Introducing Pinned Object Heap (#32283)
authorVladimir Sadov <vsadov@microsoft.com>
Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)
committerGitHub <noreply@github.com>
Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)
* Introducing Pinned Object Heap

* PR feedback

* reverted a test-only change

src/coreclr/src/gc/gc.cpp
src/coreclr/src/gc/gc.h
src/coreclr/src/gc/gcpriv.h

index abbabb6..44020b3 100644 (file)
@@ -2409,7 +2409,10 @@ static static_data static_data_table[latency_level_last - latency_level_first +
         // gen2
         {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100},
         // loh
-        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}
+        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
+        // poh
+        // TODO: tuning https://github.com/dotnet/runtime/issues/13739
+        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
     },
 
     // latency_level_balanced
@@ -2427,6 +2430,9 @@ static static_data static_data_table[latency_level_last - latency_level_first +
         // gen2
         {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100},
         // loh
+        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
+        // poh
+        // TODO: tuning https://github.com/dotnet/runtime/issues/13739
         {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}
     },
 };
@@ -2758,6 +2764,8 @@ size_t      gc_heap::bgc_overflow_count = 0;
 
 size_t      gc_heap::bgc_begin_loh_size = 0;
 size_t      gc_heap::end_loh_size = 0;
+size_t      gc_heap::bgc_begin_poh_size = 0;
+size_t      gc_heap::end_poh_size = 0;
 
 #ifdef BGC_SERVO_TUNING
 uint64_t    gc_heap::loh_a_no_bgc = 0;
@@ -2773,6 +2781,8 @@ uint32_t    gc_heap::bgc_alloc_spin_uoh = 0;
 
 size_t      gc_heap::bgc_loh_size_increased = 0;
 
+size_t      gc_heap::bgc_poh_size_increased = 0;
+
 size_t      gc_heap::background_soh_alloc_count = 0;
 
 size_t      gc_heap::background_uoh_alloc_count = 0;
@@ -2957,6 +2967,7 @@ BOOL        gc_heap::heap_analyze_enabled = FALSE;
 
 alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST-1];
 alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1];
+alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST-1];
 
 dynamic_data gc_heap::dynamic_data_table [total_generation_count];
 gc_history_per_heap gc_heap::gc_data_per_heap;
@@ -3002,6 +3013,7 @@ CFinalize*  gc_heap::finalize_queue = 0;
 VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh;
 VOLATILE(bool) gc_heap::card_mark_done_soh;
 VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh;
+VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh;
 VOLATILE(bool) gc_heap::card_mark_done_uoh;
 #endif // FEATURE_CARD_MARKING_STEALING
 
@@ -4332,13 +4344,16 @@ typedef struct
     imemory_data *initial_memory;
     imemory_data *initial_normal_heap; // points into initial_memory_array
     imemory_data *initial_large_heap;  // points into initial_memory_array
+    imemory_data *initial_pinned_heap; // points into initial_memory_array
 
     size_t block_size_normal;
     size_t block_size_large;
+    size_t block_size_pinned;
 
     int block_count;                // # of blocks in each
     int current_block_normal;
     int current_block_large;
+    int current_block_pinned;
 
     enum 
     { 
@@ -4355,6 +4370,7 @@ typedef struct
         {
             case 0: return block_size_normal;
             case 1: return block_size_large;
+            case 2: return block_size_pinned;
             default: __UNREACHABLE();
         }
     };
@@ -4367,6 +4383,7 @@ typedef struct
             case soh_gen1: 
             case soh_gen2: return initial_normal_heap[h_number].memory_base;
             case loh_generation: return initial_large_heap[h_number].memory_base;
+            case poh_generation: return initial_pinned_heap[h_number].memory_base;
             default: __UNREACHABLE();
         }
     };
@@ -4379,6 +4396,7 @@ typedef struct
             case soh_gen1: 
             case soh_gen2: return block_size_normal;
             case loh_generation: return block_size_large;
+            case poh_generation: return block_size_pinned;
             default: __UNREACHABLE();
         }
     };
@@ -4387,15 +4405,15 @@ typedef struct
 
 initial_memory_details memory_details;
 
-BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heaps, bool use_large_pages_p)
+BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p)
 {
     BOOL reserve_success = FALSE;
 
     // should only be called once
     assert (memory_details.initial_memory == 0);
 
-    // soh + loh segments * num_heaps  
-    memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)];
+    // soh + loh + poh segments * num_heaps
+    memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)]; 
     if (memory_details.initial_memory == 0)
     {
         dprintf (2, ("failed to reserve %Id bytes for imemory_data", num_heaps * (total_generation_count - ephemeral_generation_count) * sizeof (imemory_data)));
@@ -4404,13 +4422,16 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
 
     memory_details.initial_normal_heap = memory_details.initial_memory;
     memory_details.initial_large_heap = memory_details.initial_normal_heap + num_heaps;
+    memory_details.initial_pinned_heap = memory_details.initial_large_heap + num_heaps;
     memory_details.block_size_normal = normal_size;
     memory_details.block_size_large = large_size;
+    memory_details.block_size_pinned = pinned_size;
 
     memory_details.block_count = num_heaps;
 
     memory_details.current_block_normal = 0;
     memory_details.current_block_large = 0;
+    memory_details.current_block_pinned = 0;
 
     g_gc_lowest_address = MAX_PTR;
     g_gc_highest_address = 0;
@@ -4422,13 +4443,13 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
         return FALSE;
     }
 
-    if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size))
+    if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size + pinned_size))
     {
         dprintf (2, ("(0x%Ix + 0x%Ix)*0x%Ix overflow", normal_size, large_size, memory_details.block_count));
         return FALSE;
     }
 
-    size_t requestedMemory = memory_details.block_count * (normal_size + large_size);
+    size_t requestedMemory = memory_details.block_count * (normal_size + large_size + pinned_size);
 
     uint8_t* allatonce_block = (uint8_t*)virtual_alloc (requestedMemory, use_large_pages_p);
     if (allatonce_block)
@@ -4443,27 +4464,32 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
                                                              (i * normal_size);
             memory_details.initial_large_heap[i].memory_base = allatonce_block +
                 (memory_details.block_count * normal_size) + (i * large_size);
+            memory_details.initial_pinned_heap[i].memory_base = allatonce_block +
+                (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size);
 
             reserve_success = TRUE;
         }
     }
     else
     {
-        // try to allocate 2 blocks
+        // try to allocate 3 blocks
         uint8_t* b1 = (uint8_t*)virtual_alloc (memory_details.block_count * normal_size, use_large_pages_p);
         uint8_t* b2 = (uint8_t*)virtual_alloc (memory_details.block_count * large_size, use_large_pages_p);
+        uint8_t* b3 = (uint8_t*)virtual_alloc (memory_details.block_count * pinned_size, use_large_pages_p);
 
-        if (b1 && b2)
+        if (b1 && b2 && b3)
         {
             memory_details.allocation_pattern = initial_memory_details::EACH_GENERATION;
-            g_gc_lowest_address = min (b1, b2);
+            g_gc_lowest_address = min (b1, min(b2, b3));
             g_gc_highest_address = max (b1 + memory_details.block_count * normal_size,
-                                        b2 + memory_details.block_count * large_size);
+                                   max (b2 + memory_details.block_count * large_size,
+                                        b3 + memory_details.block_count * pinned_size));
 
             for (int i = 0; i < memory_details.block_count; i++)
             {
                 memory_details.initial_normal_heap[i].memory_base = b1 + (i * normal_size);
                 memory_details.initial_large_heap[i].memory_base = b2 + (i * large_size);
+                memory_details.initial_pinned_heap[i].memory_base = b3 + (i * pinned_size);
             }
 
             reserve_success = TRUE;
@@ -4476,6 +4502,8 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
                 virtual_free (b1, memory_details.block_count * normal_size);
             if (b2)
                 virtual_free (b2, memory_details.block_count * large_size);
+            if (b3)
+                virtual_free (b3, memory_details.block_count * pinned_size);
         }
 
         if ((b2 == NULL) && (memory_details.block_count > 1))
@@ -4533,12 +4561,15 @@ void destroy_initial_memory()
 
             virtual_free (memory_details.initial_large_heap[0].memory_base,
                 memory_details.block_count*memory_details.block_size_large);
+
+             virtual_free (memory_details.initial_pinned_heap[0].memory_base,
+                memory_details.block_count*memory_details.block_size_pinned);
        }
         else
         {
             assert (memory_details.allocation_pattern == initial_memory_details::EACH_BLOCK);
             imemory_data *current_block = memory_details.initial_memory;
-            for(int i = 0; i < (memory_details.block_count*2); i++, current_block++)
+            for (int i = 0; i < (memory_details.block_count*(total_generation_count - ephemeral_generation_count)); i++, current_block++)
             {
                 size_t block_size = memory_details.block_size (i);
                 if (current_block->memory_base != NULL)
@@ -4552,6 +4583,7 @@ void destroy_initial_memory()
         memory_details.initial_memory = NULL;
         memory_details.initial_normal_heap = NULL;
         memory_details.initial_large_heap = NULL;
+        memory_details.initial_pinned_heap = NULL;
     }
 }
 
@@ -5012,7 +5044,9 @@ heap_segment* gc_heap::get_segment_for_uoh (int gen_number, size_t size
 #ifdef MULTIPLE_HEAPS
         heap_segment_heap (res) = hp;
 #endif //MULTIPLE_HEAPS
-        res->flags |= heap_segment_flags_loh;
+        res->flags |= gen_number == poh_generation ?
+                                        heap_segment_flags_poh :
+                                        heap_segment_flags_loh;
 
         FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(res), (size_t)(heap_segment_reserved (res) - heap_segment_mem(res)), gc_etw_segment_large_object_heap);
 
@@ -6420,7 +6454,7 @@ void gc_heap::fix_uoh_allocation_area (BOOL for_gc_p)
 {
     UNREFERENCED_PARAMETER(for_gc_p);
 
-    for(int i = uoh_start_generation; i < total_generation_count; i++)
+    for (int i = uoh_start_generation; i < total_generation_count; i++)
     {
 #ifdef _DEBUG
         alloc_context* acontext =
@@ -8201,7 +8235,7 @@ void gc_heap::copy_brick_card_range (uint8_t* la, uint32_t* old_card_table,
 
         // We don't need to go through all the card tables here because
         // we only need to copy from the GC version of the mark array - when we
-        // mark (even in allocate_large_object) we always use that mark array.
+        // mark (even in allocate_uoh_object) we always use that mark array.
         if ((card_table_highest_address (old_ct) >= start) &&
             (card_table_lowest_address (old_ct) <= end))
         {
@@ -10374,7 +10408,8 @@ size_t gc_heap::get_segment_size_hard_limit (uint32_t* num_heaps, bool should_ad
 }
 
 HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
-                                size_t loh_segment_size
+                                size_t loh_segment_size,
+                                size_t poh_segment_size
 #ifdef MULTIPLE_HEAPS
                                 ,int number_of_heaps
 #endif //MULTIPLE_HEAPS
@@ -10494,7 +10529,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
 #endif //BACKGROUND_GC
 
     reserved_memory = 0;
-    size_t initial_heap_size = soh_segment_size + loh_segment_size;
+    size_t initial_heap_size = soh_segment_size + loh_segment_size + poh_segment_size;
 #ifdef MULTIPLE_HEAPS
     reserved_memory_limit = initial_heap_size * number_of_heaps;
 #else //MULTIPLE_HEAPS
@@ -10507,7 +10542,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
         check_commit_cs.Initialize();
     }
 
-    if (!reserve_initial_memory (soh_segment_size, loh_segment_size, number_of_heaps, use_large_pages_p))
+    if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p))
         return E_OUTOFMEMORY;
 
 #ifdef CARD_BUNDLE
@@ -11173,7 +11208,7 @@ gc_heap::init_gc_heap (int  h_number)
 #endif //!SEG_MAPPING_TABLE
 
 
-    // Create segments for the large generation
+     // Create segments for the large and pinned generations
     heap_segment* lseg = make_initial_segment(loh_generation, h_number);
     if (!lseg)
         return 0;
@@ -11184,19 +11219,36 @@ gc_heap::init_gc_heap (int  h_number)
                               (size_t)(heap_segment_reserved (lseg) - heap_segment_mem(lseg)),
                               gc_etw_segment_large_object_heap);
 
+    heap_segment* pseg = make_initial_segment(poh_generation, h_number);
+    if (!pseg)
+        return 0;
+
+    pseg->flags |= heap_segment_flags_poh;
+
+    FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(pseg),
+                              (size_t)(heap_segment_reserved (pseg) - heap_segment_mem(pseg)),
+                              gc_etw_segment_pinned_object_heap);
+
 #ifdef SEG_MAPPING_TABLE
     seg_mapping_table_add_segment (lseg, __this);
+    seg_mapping_table_add_segment (pseg, __this);
 #else //SEG_MAPPING_TABLE
     seg_table->insert ((uint8_t*)lseg, sdelta);
+    seg_table->insert ((uint8_t*)pseg, sdelta);
 #endif //SEG_MAPPING_TABLE
 
     make_generation (loh_generation, lseg, heap_segment_mem (lseg), 0);
+    make_generation (poh_generation, pseg, heap_segment_mem (pseg), 0);
 
     heap_segment_allocated (lseg) = heap_segment_mem (lseg) + Align (min_obj_size, get_alignment_constant (FALSE));
     heap_segment_used (lseg) = heap_segment_allocated (lseg) - plug_skew;
 
+    heap_segment_allocated (pseg) = heap_segment_mem (pseg) + Align (min_obj_size, get_alignment_constant (FALSE));
+    heap_segment_used (pseg) = heap_segment_allocated (pseg) - plug_skew;
+
     generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST, gen2_alloc_list);
     generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST, loh_alloc_list);
+    generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST, poh_alloc_list);
 
     for (int gen_num = 0; gen_num < total_generation_count; gen_num++)
     {
@@ -11206,10 +11258,12 @@ gc_heap::init_gc_heap (int  h_number)
 
 #ifdef MULTIPLE_HEAPS
     heap_segment_heap (lseg) = this;
+    heap_segment_heap (pseg) = this;
 
     //initialize the alloc context heap
     generation_alloc_context (generation_of (soh_gen0))->set_alloc_heap(vm_heap);
     generation_alloc_context (generation_of (loh_generation))->set_alloc_heap(vm_heap);
+    generation_alloc_context (generation_of (poh_generation))->set_alloc_heap(vm_heap);
 
 #endif //MULTIPLE_HEAPS
 
@@ -11379,6 +11433,7 @@ gc_heap::init_gc_heap (int  h_number)
     background_uoh_alloc_count = 0;
     bgc_overflow_count = 0;
     end_loh_size = dd_min_size (dynamic_data_of (loh_generation));
+    end_poh_size = dd_min_size (dynamic_data_of (poh_generation));
 #endif //BACKGROUND_GC
 
 #ifdef GC_CONFIG_DRIVEN
@@ -13438,6 +13493,15 @@ int gc_heap::bgc_loh_allocate_spin()
     return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size);
 }
 
+int gc_heap::bgc_poh_allocate_spin()
+{
+    size_t min_gc_size = dd_min_size (dynamic_data_of (poh_generation));
+    size_t bgc_begin_size = bgc_begin_poh_size;
+    size_t bgc_size_increased = bgc_poh_size_increased;
+    size_t end_size = end_poh_size;
+
+    return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size);
+}
 #endif //BACKGROUND_GC
 
 size_t gc_heap::get_uoh_seg_size (size_t size)
@@ -13693,7 +13757,9 @@ allocation_state gc_heap::allocate_uoh (int gen_number,
             }
 #endif //BGC_SERVO_TUNING
 
-            int spin_for_allocation = bgc_loh_allocate_spin();
+            int spin_for_allocation = (gen_number == loh_generation) ?
+                bgc_loh_allocate_spin() :
+                bgc_poh_allocate_spin();
 
             if (spin_for_allocation >= 0)
             {
@@ -16408,7 +16474,8 @@ int gc_heap::generation_to_condemn (int n_initial,
             for (int i = 0; i < n_heaps; i++)
             {
                 if (((g_heaps[i]->current_generation_size (max_generation)) > bgc_min_per_heap) || 
-                    ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap))
+                    ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap) || 
+                    ((g_heaps[i]->current_generation_size (poh_generation)) > bgc_min_per_heap))
                 {
                     bgc_heap_too_small = FALSE;
                     break;
@@ -16416,7 +16483,8 @@ int gc_heap::generation_to_condemn (int n_initial,
             }
 #else //MULTIPLE_HEAPS
             if ((current_generation_size (max_generation) > bgc_min_per_heap) || 
-                (current_generation_size (loh_generation) > bgc_min_per_heap))
+                (current_generation_size (loh_generation) > bgc_min_per_heap) || 
+                (current_generation_size (poh_generation) > bgc_min_per_heap))
             {
                 bgc_heap_too_small = FALSE;
             }
@@ -17647,6 +17715,7 @@ void gc_heap::update_collection_counts ()
         if (i == max_generation)
         {
             dd_collection_count (dynamic_data_of (loh_generation))++;
+            dd_collection_count(dynamic_data_of(poh_generation))++;
         }
 
         dd_gc_clock (dd) = dd_gc_clock (dd0);
@@ -21146,9 +21215,12 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
 #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
             {
                 dprintf (3, ("Marking cross generation pointers for uoh objects on heap %d", heap_number));
-                for(int i = uoh_start_generation; i < total_generation_count; i++)
+                for (int i = uoh_start_generation; i < total_generation_count; i++)
                 {
-                    mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
+#ifndef ALLOW_REFERENCES_IN_POH
+                    if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                        mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
                 }
 
 #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
@@ -21172,9 +21244,12 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
                 if (!hp->card_mark_done_uoh)
                 {
                     dprintf(3, ("Marking cross generation pointers for large objects on heap %d", hp->heap_number));
-                    for(int i = uoh_start_generation; i < total_generation_count; i++)
+                    for (int i = uoh_start_generation; i < total_generation_count; i++)
                     {
-                        hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
+#ifndef ALLOW_REFERENCES_IN_POH
+                        if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                            hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
                     }
 
                     hp->card_mark_done_uoh = true;
@@ -23921,6 +23996,8 @@ void gc_heap::plan_phase (int condemned_gen_number)
             GCToEEInterface::DiagWalkLOHSurvivors(__this);
             sweep_uoh_objects (loh_generation);
         }
+
+        sweep_uoh_objects (poh_generation);
     }
     else
     {
@@ -25894,8 +25971,14 @@ void gc_heap::relocate_phase (int condemned_gen_number,
         if (!card_mark_done_uoh)
 #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
         {
-            dprintf (3, ("Relocating cross generation pointers for large objects on heap %d", heap_number));
-            mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, loh_generation, TRUE THIS_ARG);
+            dprintf (3, ("Relocating cross generation pointers for uoh objects on heap %d", heap_number));
+            for (int i = uoh_start_generation; i < total_generation_count; i++)
+            {
+#ifndef ALLOW_REFERENCES_IN_POH
+                if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                    mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG);
+            }
 
 #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
             card_mark_done_uoh = true;
@@ -25915,6 +25998,10 @@ void gc_heap::relocate_phase (int condemned_gen_number,
         {
             relocate_in_uoh_objects (loh_generation);
         }
+
+#ifdef ALLOW_REFERENCES_IN_POH
+        relocate_in_uoh_objects (poh_generation);
+#endif
     }
 #ifndef FEATURE_CARD_MARKING_STEALING
     // moved this code *before* we scan the older generations via mark_through_cards_xxx
@@ -25958,8 +26045,14 @@ void gc_heap::relocate_phase (int condemned_gen_number,
 
             if (!hp->card_mark_done_uoh)
             {
-                dprintf(3, ("Relocating cross generation pointers for large objects on heap %d", hp->heap_number));
-                hp->mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, loh_generation, TRUE THIS_ARG);
+                dprintf(3, ("Relocating cross generation pointers for uoh objects on heap %d", hp->heap_number));
+                for (int i = uoh_start_generation; i < total_generation_count; i++)
+                {
+#ifndef ALLOW_REFERENCES_IN_POH
+                    if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                        hp->mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG);
+                }
                 hp->card_mark_done_uoh = true;
             }
         }
@@ -27166,10 +27259,13 @@ void gc_heap::background_mark_phase ()
 
     size_t total_soh_size = generation_sizes (generation_of (max_generation));
     size_t total_loh_size = generation_size (loh_generation);
+    size_t total_poh_size = generation_size (poh_generation);
     bgc_begin_loh_size = total_loh_size;
+    bgc_begin_poh_size = total_poh_size;
     bgc_loh_size_increased = 0;
+    bgc_poh_size_increased = 0;
 
-    dprintf (GTC_LOG, ("BM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+    dprintf (GTC_LOG, ("BM: h%d: loh: %Id, soh: %Id, poh: %Id", heap_number, total_loh_size, total_soh_size, total_poh_size));
 
     {
         //concurrent_print_time_delta ("copying stack roots");
@@ -27446,8 +27542,9 @@ void gc_heap::background_mark_phase ()
 
         total_soh_size = generation_sizes (generation_of (max_generation));
         total_loh_size = generation_size (loh_generation);
+        total_poh_size = generation_size (poh_generation);
 
-        dprintf (GTC_LOG, ("FM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+        dprintf (GTC_LOG, ("FM: h%d: loh: %Id, soh: %Id, poh: %Id", heap_number, total_loh_size, total_soh_size, total_poh_size));
 
         dprintf (2, ("nonconcurrent marking stack roots"));
         GCScan::GcScanRoots(background_promote,
@@ -27591,8 +27688,9 @@ void gc_heap::background_mark_phase ()
 
     gen0_bricks_cleared = FALSE;
 
-    dprintf (2, ("end of bgc mark: loh: %d, soh: %d", 
+    dprintf (2, ("end of bgc mark: loh: %d, poh: %d, soh: %d", 
                  generation_size (loh_generation), 
+                 generation_size (poh_generation), 
                  generation_sizes (generation_of (max_generation))));
 
     for (int gen_idx = max_generation; gen_idx < total_generation_count; gen_idx++)
@@ -27903,7 +28001,7 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p)
     bool reset_watch_state = !!concurrent_p;
     bool is_runtime_suspended = !concurrent_p;
     BOOL small_object_segments = TRUE;
-    for(int i = max_generation; i < total_generation_count; i++)
+    for (int i = max_generation; i < total_generation_count; i++)
     {
         heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i)));
         PREFIX_ASSUME(seg != NULL);
@@ -32798,6 +32896,10 @@ void gc_heap::compute_new_dynamic_data (int gen_number)
 #ifdef BACKGROUND_GC
             if (i == loh_generation)
                 end_loh_size = total_gen_size;
+
+            if (i == poh_generation)
+                end_poh_size = total_gen_size;
+
 #endif //BACKGROUND_GC
             //update counter
             dd_promoted_size (dd) = out;
@@ -33370,7 +33472,7 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
     }
 }
 
-CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int64_t& alloc_bytes)
+CObjectHeader* gc_heap::allocate_uoh_object (size_t jsize, uint32_t flags, int gen_number, int64_t& alloc_bytes)
 {
     //create a new alloc context because gen3context is shared.
     alloc_context acontext;
@@ -33393,17 +33495,19 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int
 
     size_t size = AlignQword (jsize);
     int align_const = get_alignment_constant (FALSE);
-#ifdef FEATURE_LOH_COMPACTION
-    size_t pad = Align (loh_padding_obj_size, align_const);
-#else
     size_t pad = 0;
+#ifdef FEATURE_LOH_COMPACTION
+    if (gen_number == loh_generation)
+    {
+        pad = Align (loh_padding_obj_size, align_const);
+    }
 #endif //FEATURE_LOH_COMPACTION
 
     assert (size >= Align (min_obj_size, align_const));
 #ifdef _MSC_VER
 #pragma inline_depth(0)
 #endif //_MSC_VER
-    if (! allocate_more_space (&acontext, (size + pad), flags, loh_generation))
+    if (! allocate_more_space (&acontext, (size + pad), flags, gen_number))
     {
         return 0;
     }
@@ -33447,8 +33551,6 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int
             mark_array_clear_marked (result);
         }
 #ifdef BACKGROUND_GC
-        //the object has to cover one full mark uint32_t
-        assert (size >= mark_word_size);
         if (current_c_gc_state != c_gc_state_free)
         {
             dprintf (3, ("Concurrent allocation of a large object %Ix",
@@ -34295,8 +34397,9 @@ void gc_heap::background_sweep()
 
     size_t total_soh_size = generation_sizes (generation_of (max_generation));
     size_t total_loh_size = generation_size (loh_generation);
+    size_t total_poh_size = generation_size (poh_generation);
 
-    dprintf (GTC_LOG, ("loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+    dprintf (GTC_LOG, ("h%d: S: poh: %Id, loh: %Id, soh: %Id", heap_number, total_poh_size, total_loh_size, total_soh_size));
 
     dprintf (GTC_LOG, ("end of bgc sweep: gen2 FL: %Id, FO: %Id",
         generation_free_list_space (generation_of (max_generation)),
@@ -34307,6 +34410,11 @@ void gc_heap::background_sweep()
         generation_free_list_space (generation_of (loh_generation)),
         generation_free_obj_space (generation_of (loh_generation))));
 
+    dprintf (GTC_LOG, ("h%d: end of bgc sweep: poh FL: %Id, FO: %Id", 
+        heap_number,
+        generation_free_list_space (generation_of (poh_generation)),
+        generation_free_obj_space (generation_of (poh_generation))));
+
     FIRE_EVENT(BGC2ndConEnd);
     concurrent_print_time_delta ("background sweep");
 
@@ -34556,7 +34664,9 @@ void gc_heap::mark_through_cards_for_uoh_objects (card_fn fn,
     size_t total_cards_cleared = 0;
 
 #ifdef FEATURE_CARD_MARKING_STEALING
-    VOLATILE(uint32_t)* chunk_index = (VOLATILE(uint32_t)*) & card_mark_chunk_index_loh;
+    VOLATILE(uint32_t)* chunk_index = (VOLATILE(uint32_t)*) &(gen_num == loh_generation ? 
+        card_mark_chunk_index_loh : 
+        card_mark_chunk_index_poh);
 
     card_marking_enumerator card_mark_enumerator(seg, low, chunk_index);
     card_word_end = 0;
@@ -35451,7 +35561,7 @@ void gc_heap::verify_partial ()
     BOOL bad_ref_p = FALSE;
     BOOL free_ref_p = FALSE;
 
-    for(int i = max_generation; i < total_generation_count; i++)
+    for (int i = max_generation; i < total_generation_count; i++)
     {
         generation* gen = generation_of (i);
         int align_const = get_alignment_constant (i == max_generation);
@@ -36350,9 +36460,11 @@ HRESULT GCHeap::Initialize()
 
 #ifdef MULTIPLE_HEAPS
     gc_heap::n_heaps = nhp;
-    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, nhp);
+    // TODO: tuning https://github.com/dotnet/runtime/issues/13739
+    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/, nhp);
 #else
-    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/);
+    // TODO: tuning https://github.com/dotnet/runtime/issues/13739
+    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/);
 #endif //MULTIPLE_HEAPS
 
     if (hr != S_OK)
@@ -36411,7 +36523,7 @@ HRESULT GCHeap::Initialize()
 #ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
 #if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS)
     if (GCStress<cfg_any>::IsEnabled())  {
-        for(int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++)
+        for (int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++)
         {
             m_StressObjs[i] = CreateGlobalHandle(0);
         }
@@ -37157,7 +37269,7 @@ GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint3
 
         alloc_context* acontext = generation_alloc_context (hp->generation_of (loh_generation));
 
-        newAlloc = (Object*) hp->allocate_large_object (size, flags, acontext->alloc_bytes_uoh);
+        newAlloc = (Object*) hp->allocate_uoh_object (size, flags, loh_generation, acontext->alloc_bytes_uoh);
         ASSERT(((size_t)newAlloc & 7) == 0);
     }
 
@@ -37202,7 +37314,7 @@ GCHeap::AllocLHeap( size_t size, uint32_t flags REQD_ALIGN_DCL)
 #endif //MULTIPLE_HEAPS
 
     alloc_context* acontext = generation_alloc_context (hp->generation_of (loh_generation));
-    newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, acontext->alloc_bytes_uoh);
+    newAlloc = (Object*) hp->allocate_uoh_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, loh_generation, acontext->alloc_bytes_uoh);
 
 #ifdef FEATURE_STRUCTALIGN
     newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
@@ -37258,7 +37370,7 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_
     }
     else
     {
-        newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, acontext->alloc_bytes_uoh);
+        newAlloc = (Object*) hp->allocate_uoh_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, loh_generation, acontext->alloc_bytes_uoh);
 #ifdef FEATURE_STRUCTALIGN
         newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
 #endif // FEATURE_STRUCTALIGN
index 2ac1710..51bb270 100644 (file)
@@ -89,7 +89,8 @@ enum gc_etw_segment_type
 {
     gc_etw_segment_small_object_heap = 0,
     gc_etw_segment_large_object_heap = 1,
-    gc_etw_segment_read_only_heap = 2
+    gc_etw_segment_read_only_heap = 2,
+    gc_etw_segment_pinned_object_heap = 3
 };
 
 // Types of allocations, emitted by the GCAllocationTick ETW event.
@@ -119,13 +120,16 @@ enum gc_generation_num
     // large object heap, technically not a generation, but it is convenient to represent it as such
     loh_generation = 3,
 
+    // pinned heap, a separate generation for the same reasons as loh
+    poh_generation = 4,
+
     uoh_start_generation = loh_generation,
 
     // number of ephemeral generations 
     ephemeral_generation_count = max_generation,
 
     // number of all generations 
-    total_generation_count = loh_generation + 1
+    total_generation_count = poh_generation + 1
 };
 
 #ifdef GC_CONFIG_DRIVEN
index 952eb83..4801a19 100644 (file)
@@ -106,6 +106,8 @@ inline void FATAL_GC_ERROR()
 
 #define CARD_BUNDLE         //enable card bundle feature.(requires WRITE_WATCH)
 
+#define ALLOW_REFERENCES_IN_POH  //Allow POH objects to contain references.
+
 // If this is defined we use a map for segments in order to find the heap for
 // a segment fast. But it does use more memory as we have to cover the whole
 // heap range and for each entry we allocate a struct of 5 ptr-size words
@@ -1227,7 +1229,8 @@ public:
 
     static
     HRESULT initialize_gc  (size_t soh_segment_size,
-                            size_t loh_segment_size
+                            size_t loh_segment_size,
+                            size_t poh_segment_size
 #ifdef MULTIPLE_HEAPS
                             , int number_of_heaps
 #endif //MULTIPLE_HEAPS
@@ -1269,13 +1272,13 @@ public:
     void gc_thread_stub (void* arg);
 #endif //MULTIPLE_HEAPS
 
-    // For LOH allocations we only update the alloc_bytes_uoh in allocation
+    // For UOH allocations we only update the alloc_bytes_uoh in allocation
     // context - we don't actually use the ptr/limit from it so I am
     // making this explicit by not passing in the alloc_context.
-    // Note: This is an instance method, but the heap instance is only used for
+    // Note: This are instance methods, but the heap instance is only used for
     // lowest_address and highest_address, which are currently the same accross all heaps.
     PER_HEAP
-    CObjectHeader* allocate_large_object (size_t size, uint32_t flags, int64_t& alloc_bytes);
+    CObjectHeader* allocate_uoh_object (size_t size, uint32_t flags, int gen_num, int64_t& alloc_bytes);
 
 #ifdef FEATURE_STRUCTALIGN
     PER_HEAP
@@ -1541,6 +1544,9 @@ protected:
 
     PER_HEAP
     BOOL bgc_loh_allocate_spin();
+
+    PER_HEAP
+    BOOL bgc_poh_allocate_spin();
 #endif //BACKGROUND_GC
 
 #define max_saved_spinlock_info 48
@@ -3724,7 +3730,11 @@ protected:
     PER_HEAP
     size_t     bgc_begin_loh_size;
     PER_HEAP
+    size_t     bgc_begin_poh_size;
+    PER_HEAP
     size_t     end_loh_size;
+    PER_HEAP
+    size_t     end_poh_size;
 
 #ifdef BGC_SERVO_TUNING
     PER_HEAP
@@ -3758,6 +3768,8 @@ protected:
     // in free list doesn't increase the heap size.
     PER_HEAP
     size_t     bgc_loh_size_increased;
+    PER_HEAP
+    size_t     bgc_poh_size_increased;
 
     PER_HEAP
     size_t     background_soh_alloc_count;
@@ -3895,6 +3907,7 @@ protected:
 
 #define youngest_generation (generation_of (0))
 #define large_object_generation (generation_of (loh_generation))
+#define pinned_object_generation (generation_of (poh_generation))
 
     // The more_space_lock and gc_lock is used for 3 purposes:
     //
@@ -3982,6 +3995,16 @@ protected:
     PER_HEAP
     alloc_list gen2_alloc_list[NUM_GEN2_ALIST-1];
 
+// TODO: tuning https://github.com/dotnet/runtime/issues/13739
+#define NUM_POH_ALIST (12)
+#ifdef BIT64
+#define BASE_POH_ALIST (1*256)
+#else
+#define BASE_POH_ALIST (1*128)
+#endif // BIT64
+    PER_HEAP
+    alloc_list poh_alloc_list[NUM_POH_ALIST-1];
+
 //------------------------------------------    
 
     PER_HEAP
@@ -4272,6 +4295,9 @@ public:
     VOLATILE(uint32_t)    card_mark_chunk_index_loh;
 
     PER_HEAP
+    VOLATILE(uint32_t)    card_mark_chunk_index_poh;
+
+    PER_HEAP
     VOLATILE(bool)        card_mark_done_uoh;
 
     PER_HEAP
@@ -4282,6 +4308,7 @@ public:
         card_mark_done_soh = false;
 
         card_mark_chunk_index_loh = ~0;
+        card_mark_chunk_index_poh = ~0;
         card_mark_done_uoh = false;
     }
 
@@ -4772,6 +4799,7 @@ struct loh_padding_obj
 #define heap_segment_flags_ma_pcommitted 128
 #define heap_segment_flags_uoh_delete   256
 
+#define heap_segment_flags_poh          512
 #endif //BACKGROUND_GC
 
 //need to be careful to keep enough pad items to fit a relocation node
@@ -4860,7 +4888,7 @@ BOOL heap_segment_unmappable_p (heap_segment* inst)
 inline
 BOOL heap_segment_uoh_p (heap_segment * inst)
 {
-    return !!(inst->flags & heap_segment_flags_loh);
+    return !!(inst->flags & (heap_segment_flags_loh | heap_segment_flags_poh));
 }
 
 #ifdef BACKGROUND_GC