Introducing Pinned Object Heap (#32283)

author Vladimir Sadov <vsadov@microsoft.com>

Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)

committer GitHub <noreply@github.com>

Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)
author Vladimir Sadov <vsadov@microsoft.com>
Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)
committer GitHub <noreply@github.com>
Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)
diff --git a/src/coreclr/src/gc/gc.cpp b/src/coreclr/src/gc/gc.cpp

index abbabb6..44020b3 100644 (file)
--- a/src/coreclr/src/gc/gc.cpp
+++ b/src/coreclr/src/gc/gc.cpp
@@ -2409,7 +2409,10 @@ static static_data static_data_table[latency_level_last - latency_level_first +
          // gen2
          {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100},
          // loh
-        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}
+        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
+        // poh
+        // TODO: tuning https://github.com/dotnet/runtime/issues/13739
+        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
      },
  
      // latency_level_balanced
@@ -2427,6 +2430,9 @@ static static_data static_data_table[latency_level_last - latency_level_first +
          // gen2
          {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100},
          // loh
+        {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0},
+        // poh
+        // TODO: tuning https://github.com/dotnet/runtime/issues/13739
          {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}
      },
  };
@@ -2758,6 +2764,8 @@ size_t      gc_heap::bgc_overflow_count = 0;
  
  size_t      gc_heap::bgc_begin_loh_size = 0;
  size_t      gc_heap::end_loh_size = 0;
+size_t      gc_heap::bgc_begin_poh_size = 0;
+size_t      gc_heap::end_poh_size = 0;
  
  #ifdef BGC_SERVO_TUNING
  uint64_t    gc_heap::loh_a_no_bgc = 0;
@@ -2773,6 +2781,8 @@ uint32_t    gc_heap::bgc_alloc_spin_uoh = 0;
  
  size_t      gc_heap::bgc_loh_size_increased = 0;
  
+size_t      gc_heap::bgc_poh_size_increased = 0;
+
  size_t      gc_heap::background_soh_alloc_count = 0;
  
  size_t      gc_heap::background_uoh_alloc_count = 0;
@@ -2957,6 +2967,7 @@ BOOL        gc_heap::heap_analyze_enabled = FALSE;
  
  alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST-1];
  alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1];
+alloc_list gc_heap::poh_alloc_list [NUM_POH_ALIST-1];
  
  dynamic_data gc_heap::dynamic_data_table [total_generation_count];
  gc_history_per_heap gc_heap::gc_data_per_heap;
@@ -3002,6 +3013,7 @@ CFinalize*  gc_heap::finalize_queue = 0;
  VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_soh;
  VOLATILE(bool) gc_heap::card_mark_done_soh;
  VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_loh;
+VOLATILE(uint32_t) gc_heap::card_mark_chunk_index_poh;
  VOLATILE(bool) gc_heap::card_mark_done_uoh;
  #endif // FEATURE_CARD_MARKING_STEALING
  
@@ -4332,13 +4344,16 @@ typedef struct
      imemory_data *initial_memory;
      imemory_data *initial_normal_heap; // points into initial_memory_array
      imemory_data *initial_large_heap;  // points into initial_memory_array
+    imemory_data *initial_pinned_heap; // points into initial_memory_array
  
      size_t block_size_normal;
      size_t block_size_large;
+    size_t block_size_pinned;
  
      int block_count;                // # of blocks in each
      int current_block_normal;
      int current_block_large;
+    int current_block_pinned;
  
      enum 
      { 
@@ -4355,6 +4370,7 @@ typedef struct
          {
              case 0: return block_size_normal;
              case 1: return block_size_large;
+            case 2: return block_size_pinned;
              default: __UNREACHABLE();
          }
      };
@@ -4367,6 +4383,7 @@ typedef struct
              case soh_gen1: 
              case soh_gen2: return initial_normal_heap[h_number].memory_base;
              case loh_generation: return initial_large_heap[h_number].memory_base;
+            case poh_generation: return initial_pinned_heap[h_number].memory_base;
              default: __UNREACHABLE();
          }
      };
@@ -4379,6 +4396,7 @@ typedef struct
              case soh_gen1: 
              case soh_gen2: return block_size_normal;
              case loh_generation: return block_size_large;
+            case poh_generation: return block_size_pinned;
              default: __UNREACHABLE();
          }
      };
@@ -4387,15 +4405,15 @@ typedef struct
  
  initial_memory_details memory_details;
  
-BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heaps, bool use_large_pages_p)
+BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p)
  {
      BOOL reserve_success = FALSE;
  
      // should only be called once
      assert (memory_details.initial_memory == 0);
  
-    // soh + loh segments * num_heaps  
-    memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)];
+    // soh + loh + poh segments * num_heaps
+    memory_details.initial_memory = new (nothrow) imemory_data[num_heaps * (total_generation_count - ephemeral_generation_count)]; 
      if (memory_details.initial_memory == 0)
      {
          dprintf (2, ("failed to reserve %Id bytes for imemory_data", num_heaps * (total_generation_count - ephemeral_generation_count) * sizeof (imemory_data)));
@@ -4404,13 +4422,16 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
  
      memory_details.initial_normal_heap = memory_details.initial_memory;
      memory_details.initial_large_heap = memory_details.initial_normal_heap + num_heaps;
+    memory_details.initial_pinned_heap = memory_details.initial_large_heap + num_heaps;
      memory_details.block_size_normal = normal_size;
      memory_details.block_size_large = large_size;
+    memory_details.block_size_pinned = pinned_size;
  
      memory_details.block_count = num_heaps;
  
      memory_details.current_block_normal = 0;
      memory_details.current_block_large = 0;
+    memory_details.current_block_pinned = 0;
  
      g_gc_lowest_address = MAX_PTR;
      g_gc_highest_address = 0;
@@ -4422,13 +4443,13 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
          return FALSE;
      }
  
-    if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size))
+    if (((size_t)MAX_PTR / memory_details.block_count) < (normal_size + large_size + pinned_size))
      {
          dprintf (2, ("(0x%Ix + 0x%Ix)*0x%Ix overflow", normal_size, large_size, memory_details.block_count));
          return FALSE;
      }
  
-    size_t requestedMemory = memory_details.block_count * (normal_size + large_size);
+    size_t requestedMemory = memory_details.block_count * (normal_size + large_size + pinned_size);
  
      uint8_t* allatonce_block = (uint8_t*)virtual_alloc (requestedMemory, use_large_pages_p);
      if (allatonce_block)
@@ -4443,27 +4464,32 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
                                                               (i * normal_size);
              memory_details.initial_large_heap[i].memory_base = allatonce_block +
                  (memory_details.block_count * normal_size) + (i * large_size);
+            memory_details.initial_pinned_heap[i].memory_base = allatonce_block +
+                (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size);
  
              reserve_success = TRUE;
          }
      }
      else
      {
-        // try to allocate 2 blocks
+        // try to allocate 3 blocks
          uint8_t* b1 = (uint8_t*)virtual_alloc (memory_details.block_count * normal_size, use_large_pages_p);
          uint8_t* b2 = (uint8_t*)virtual_alloc (memory_details.block_count * large_size, use_large_pages_p);
+        uint8_t* b3 = (uint8_t*)virtual_alloc (memory_details.block_count * pinned_size, use_large_pages_p);
  
-        if (b1 && b2)
+        if (b1 && b2 && b3)
          {
              memory_details.allocation_pattern = initial_memory_details::EACH_GENERATION;
-            g_gc_lowest_address = min (b1, b2);
+            g_gc_lowest_address = min (b1, min(b2, b3));
              g_gc_highest_address = max (b1 + memory_details.block_count * normal_size,
-                                        b2 + memory_details.block_count * large_size);
+                                   max (b2 + memory_details.block_count * large_size,
+                                        b3 + memory_details.block_count * pinned_size));
  
              for (int i = 0; i < memory_details.block_count; i++)
              {
                  memory_details.initial_normal_heap[i].memory_base = b1 + (i * normal_size);
                  memory_details.initial_large_heap[i].memory_base = b2 + (i * large_size);
+                memory_details.initial_pinned_heap[i].memory_base = b3 + (i * pinned_size);
              }
  
              reserve_success = TRUE;
@@ -4476,6 +4502,8 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, int num_heap
                  virtual_free (b1, memory_details.block_count * normal_size);
              if (b2)
                  virtual_free (b2, memory_details.block_count * large_size);
+            if (b3)
+                virtual_free (b3, memory_details.block_count * pinned_size);
          }
  
          if ((b2 == NULL) && (memory_details.block_count > 1))
@@ -4533,12 +4561,15 @@ void destroy_initial_memory()
  
              virtual_free (memory_details.initial_large_heap[0].memory_base,
                  memory_details.block_count*memory_details.block_size_large);
+
+             virtual_free (memory_details.initial_pinned_heap[0].memory_base,
+                memory_details.block_count*memory_details.block_size_pinned);
         }
          else
          {
              assert (memory_details.allocation_pattern == initial_memory_details::EACH_BLOCK);
              imemory_data *current_block = memory_details.initial_memory;
-            for(int i = 0; i < (memory_details.block_count*2); i++, current_block++)
+            for (int i = 0; i < (memory_details.block_count*(total_generation_count - ephemeral_generation_count)); i++, current_block++)
              {
                  size_t block_size = memory_details.block_size (i);
                  if (current_block->memory_base != NULL)
@@ -4552,6 +4583,7 @@ void destroy_initial_memory()
          memory_details.initial_memory = NULL;
          memory_details.initial_normal_heap = NULL;
          memory_details.initial_large_heap = NULL;
+        memory_details.initial_pinned_heap = NULL;
      }
  }
  
@@ -5012,7 +5044,9 @@ heap_segment* gc_heap::get_segment_for_uoh (int gen_number, size_t size
  #ifdef MULTIPLE_HEAPS
          heap_segment_heap (res) = hp;
  #endif //MULTIPLE_HEAPS
-        res->flags |= heap_segment_flags_loh;
+        res->flags |= gen_number == poh_generation ?
+                                        heap_segment_flags_poh :
+                                        heap_segment_flags_loh;
  
          FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(res), (size_t)(heap_segment_reserved (res) - heap_segment_mem(res)), gc_etw_segment_large_object_heap);
  
@@ -6420,7 +6454,7 @@ void gc_heap::fix_uoh_allocation_area (BOOL for_gc_p)
  {
      UNREFERENCED_PARAMETER(for_gc_p);
  
-    for(int i = uoh_start_generation; i < total_generation_count; i++)
+    for (int i = uoh_start_generation; i < total_generation_count; i++)
      {
  #ifdef _DEBUG
          alloc_context* acontext =
@@ -8201,7 +8235,7 @@ void gc_heap::copy_brick_card_range (uint8_t* la, uint32_t* old_card_table,
  
          // We don't need to go through all the card tables here because
          // we only need to copy from the GC version of the mark array - when we
-        // mark (even in allocate_large_object) we always use that mark array.
+        // mark (even in allocate_uoh_object) we always use that mark array.
          if ((card_table_highest_address (old_ct) >= start) &&
              (card_table_lowest_address (old_ct) <= end))
          {
@@ -10374,7 +10408,8 @@ size_t gc_heap::get_segment_size_hard_limit (uint32_t* num_heaps, bool should_ad
  }
  
  HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
-                                size_t loh_segment_size
+                                size_t loh_segment_size,
+                                size_t poh_segment_size
  #ifdef MULTIPLE_HEAPS
                                  ,int number_of_heaps
  #endif //MULTIPLE_HEAPS
@@ -10494,7 +10529,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
  #endif //BACKGROUND_GC
  
      reserved_memory = 0;
-    size_t initial_heap_size = soh_segment_size + loh_segment_size;
+    size_t initial_heap_size = soh_segment_size + loh_segment_size + poh_segment_size;
  #ifdef MULTIPLE_HEAPS
      reserved_memory_limit = initial_heap_size * number_of_heaps;
  #else //MULTIPLE_HEAPS
@@ -10507,7 +10542,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size,
          check_commit_cs.Initialize();
      }
  
-    if (!reserve_initial_memory (soh_segment_size, loh_segment_size, number_of_heaps, use_large_pages_p))
+    if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p))
          return E_OUTOFMEMORY;
  
  #ifdef CARD_BUNDLE
@@ -11173,7 +11208,7 @@ gc_heap::init_gc_heap (int  h_number)
  #endif //!SEG_MAPPING_TABLE
  
  
-    // Create segments for the large generation
+     // Create segments for the large and pinned generations
      heap_segment* lseg = make_initial_segment(loh_generation, h_number);
      if (!lseg)
          return 0;
@@ -11184,19 +11219,36 @@ gc_heap::init_gc_heap (int  h_number)
                                (size_t)(heap_segment_reserved (lseg) - heap_segment_mem(lseg)),
                                gc_etw_segment_large_object_heap);
  
+    heap_segment* pseg = make_initial_segment(poh_generation, h_number);
+    if (!pseg)
+        return 0;
+
+    pseg->flags |= heap_segment_flags_poh;
+
+    FIRE_EVENT(GCCreateSegment_V1, heap_segment_mem(pseg),
+                              (size_t)(heap_segment_reserved (pseg) - heap_segment_mem(pseg)),
+                              gc_etw_segment_pinned_object_heap);
+
  #ifdef SEG_MAPPING_TABLE
      seg_mapping_table_add_segment (lseg, __this);
+    seg_mapping_table_add_segment (pseg, __this);
  #else //SEG_MAPPING_TABLE
      seg_table->insert ((uint8_t*)lseg, sdelta);
+    seg_table->insert ((uint8_t*)pseg, sdelta);
  #endif //SEG_MAPPING_TABLE
  
      make_generation (loh_generation, lseg, heap_segment_mem (lseg), 0);
+    make_generation (poh_generation, pseg, heap_segment_mem (pseg), 0);
  
      heap_segment_allocated (lseg) = heap_segment_mem (lseg) + Align (min_obj_size, get_alignment_constant (FALSE));
      heap_segment_used (lseg) = heap_segment_allocated (lseg) - plug_skew;
  
+    heap_segment_allocated (pseg) = heap_segment_mem (pseg) + Align (min_obj_size, get_alignment_constant (FALSE));
+    heap_segment_used (pseg) = heap_segment_allocated (pseg) - plug_skew;
+
      generation_of (max_generation)->free_list_allocator = allocator(NUM_GEN2_ALIST, BASE_GEN2_ALIST, gen2_alloc_list);
      generation_of (loh_generation)->free_list_allocator = allocator(NUM_LOH_ALIST, BASE_LOH_ALIST, loh_alloc_list);
+    generation_of (poh_generation)->free_list_allocator = allocator(NUM_POH_ALIST, BASE_POH_ALIST, poh_alloc_list);
  
      for (int gen_num = 0; gen_num < total_generation_count; gen_num++)
      {
@@ -11206,10 +11258,12 @@ gc_heap::init_gc_heap (int  h_number)
  
  #ifdef MULTIPLE_HEAPS
      heap_segment_heap (lseg) = this;
+    heap_segment_heap (pseg) = this;
  
      //initialize the alloc context heap
      generation_alloc_context (generation_of (soh_gen0))->set_alloc_heap(vm_heap);
      generation_alloc_context (generation_of (loh_generation))->set_alloc_heap(vm_heap);
+    generation_alloc_context (generation_of (poh_generation))->set_alloc_heap(vm_heap);
  
  #endif //MULTIPLE_HEAPS
  
@@ -11379,6 +11433,7 @@ gc_heap::init_gc_heap (int  h_number)
      background_uoh_alloc_count = 0;
      bgc_overflow_count = 0;
      end_loh_size = dd_min_size (dynamic_data_of (loh_generation));
+    end_poh_size = dd_min_size (dynamic_data_of (poh_generation));
  #endif //BACKGROUND_GC
  
  #ifdef GC_CONFIG_DRIVEN
@@ -13438,6 +13493,15 @@ int gc_heap::bgc_loh_allocate_spin()
      return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size);
  }
  
+int gc_heap::bgc_poh_allocate_spin()
+{
+    size_t min_gc_size = dd_min_size (dynamic_data_of (poh_generation));
+    size_t bgc_begin_size = bgc_begin_poh_size;
+    size_t bgc_size_increased = bgc_poh_size_increased;
+    size_t end_size = end_poh_size;
+
+    return bgc_allocate_spin(min_gc_size, bgc_begin_size, bgc_size_increased, end_size);
+}
  #endif //BACKGROUND_GC
  
  size_t gc_heap::get_uoh_seg_size (size_t size)
@@ -13693,7 +13757,9 @@ allocation_state gc_heap::allocate_uoh (int gen_number,
              }
  #endif //BGC_SERVO_TUNING
  
-            int spin_for_allocation = bgc_loh_allocate_spin();
+            int spin_for_allocation = (gen_number == loh_generation) ?
+                bgc_loh_allocate_spin() :
+                bgc_poh_allocate_spin();
  
              if (spin_for_allocation >= 0)
              {
@@ -16408,7 +16474,8 @@ int gc_heap::generation_to_condemn (int n_initial,
              for (int i = 0; i < n_heaps; i++)
              {
                  if (((g_heaps[i]->current_generation_size (max_generation)) > bgc_min_per_heap) || 
-                    ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap))
+                    ((g_heaps[i]->current_generation_size (loh_generation)) > bgc_min_per_heap) || 
+                    ((g_heaps[i]->current_generation_size (poh_generation)) > bgc_min_per_heap))
                  {
                      bgc_heap_too_small = FALSE;
                      break;
@@ -16416,7 +16483,8 @@ int gc_heap::generation_to_condemn (int n_initial,
              }
  #else //MULTIPLE_HEAPS
              if ((current_generation_size (max_generation) > bgc_min_per_heap) || 
-                (current_generation_size (loh_generation) > bgc_min_per_heap))
+                (current_generation_size (loh_generation) > bgc_min_per_heap) || 
+                (current_generation_size (poh_generation) > bgc_min_per_heap))
              {
                  bgc_heap_too_small = FALSE;
              }
@@ -17647,6 +17715,7 @@ void gc_heap::update_collection_counts ()
          if (i == max_generation)
          {
              dd_collection_count (dynamic_data_of (loh_generation))++;
+            dd_collection_count(dynamic_data_of(poh_generation))++;
          }
  
          dd_gc_clock (dd) = dd_gc_clock (dd0);
@@ -21146,9 +21215,12 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
  #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
              {
                  dprintf (3, ("Marking cross generation pointers for uoh objects on heap %d", heap_number));
-                for(int i = uoh_start_generation; i < total_generation_count; i++)
+                for (int i = uoh_start_generation; i < total_generation_count; i++)
                  {
-                    mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
+#ifndef ALLOW_REFERENCES_IN_POH
+                    if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                        mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
                  }
  
  #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
@@ -21172,9 +21244,12 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
                  if (!hp->card_mark_done_uoh)
                  {
                      dprintf(3, ("Marking cross generation pointers for large objects on heap %d", hp->heap_number));
-                    for(int i = uoh_start_generation; i < total_generation_count; i++)
+                    for (int i = uoh_start_generation; i < total_generation_count; i++)
                      {
-                        hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
+#ifndef ALLOW_REFERENCES_IN_POH
+                        if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                            hp->mark_through_cards_for_uoh_objects(mark_object_fn, i, FALSE THIS_ARG);
                      }
  
                      hp->card_mark_done_uoh = true;
@@ -23921,6 +23996,8 @@ void gc_heap::plan_phase (int condemned_gen_number)
              GCToEEInterface::DiagWalkLOHSurvivors(__this);
              sweep_uoh_objects (loh_generation);
          }
+
+        sweep_uoh_objects (poh_generation);
      }
      else
      {
@@ -25894,8 +25971,14 @@ void gc_heap::relocate_phase (int condemned_gen_number,
          if (!card_mark_done_uoh)
  #endif // MULTIPLE_HEAPS && FEATURE_CARD_MARKING_STEALING
          {
-            dprintf (3, ("Relocating cross generation pointers for large objects on heap %d", heap_number));
-            mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, loh_generation, TRUE THIS_ARG);
+            dprintf (3, ("Relocating cross generation pointers for uoh objects on heap %d", heap_number));
+            for (int i = uoh_start_generation; i < total_generation_count; i++)
+            {
+#ifndef ALLOW_REFERENCES_IN_POH
+                if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                    mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG);
+            }
  
  #if defined(MULTIPLE_HEAPS) && defined(FEATURE_CARD_MARKING_STEALING)
              card_mark_done_uoh = true;
@@ -25915,6 +25998,10 @@ void gc_heap::relocate_phase (int condemned_gen_number,
          {
              relocate_in_uoh_objects (loh_generation);
          }
+
+#ifdef ALLOW_REFERENCES_IN_POH
+        relocate_in_uoh_objects (poh_generation);
+#endif
      }
  #ifndef FEATURE_CARD_MARKING_STEALING
      // moved this code *before* we scan the older generations via mark_through_cards_xxx
@@ -25958,8 +26045,14 @@ void gc_heap::relocate_phase (int condemned_gen_number,
  
              if (!hp->card_mark_done_uoh)
              {
-                dprintf(3, ("Relocating cross generation pointers for large objects on heap %d", hp->heap_number));
-                hp->mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, loh_generation, TRUE THIS_ARG);
+                dprintf(3, ("Relocating cross generation pointers for uoh objects on heap %d", hp->heap_number));
+                for (int i = uoh_start_generation; i < total_generation_count; i++)
+                {
+#ifndef ALLOW_REFERENCES_IN_POH
+                    if (i != poh_generation)
+#endif //ALLOW_REFERENCES_IN_POH
+                        hp->mark_through_cards_for_uoh_objects(&gc_heap::relocate_address, i, TRUE THIS_ARG);
+                }
                  hp->card_mark_done_uoh = true;
              }
          }
@@ -27166,10 +27259,13 @@ void gc_heap::background_mark_phase ()
  
      size_t total_soh_size = generation_sizes (generation_of (max_generation));
      size_t total_loh_size = generation_size (loh_generation);
+    size_t total_poh_size = generation_size (poh_generation);
      bgc_begin_loh_size = total_loh_size;
+    bgc_begin_poh_size = total_poh_size;
      bgc_loh_size_increased = 0;
+    bgc_poh_size_increased = 0;
  
-    dprintf (GTC_LOG, ("BM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+    dprintf (GTC_LOG, ("BM: h%d: loh: %Id, soh: %Id, poh: %Id", heap_number, total_loh_size, total_soh_size, total_poh_size));
  
      {
          //concurrent_print_time_delta ("copying stack roots");
@@ -27446,8 +27542,9 @@ void gc_heap::background_mark_phase ()
  
          total_soh_size = generation_sizes (generation_of (max_generation));
          total_loh_size = generation_size (loh_generation);
+        total_poh_size = generation_size (poh_generation);
  
-        dprintf (GTC_LOG, ("FM: h%d: loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+        dprintf (GTC_LOG, ("FM: h%d: loh: %Id, soh: %Id, poh: %Id", heap_number, total_loh_size, total_soh_size, total_poh_size));
  
          dprintf (2, ("nonconcurrent marking stack roots"));
          GCScan::GcScanRoots(background_promote,
@@ -27591,8 +27688,9 @@ void gc_heap::background_mark_phase ()
  
      gen0_bricks_cleared = FALSE;
  
-    dprintf (2, ("end of bgc mark: loh: %d, soh: %d", 
+    dprintf (2, ("end of bgc mark: loh: %d, poh: %d, soh: %d", 
                   generation_size (loh_generation), 
+                 generation_size (poh_generation), 
                   generation_sizes (generation_of (max_generation))));
  
      for (int gen_idx = max_generation; gen_idx < total_generation_count; gen_idx++)
@@ -27903,7 +28001,7 @@ void gc_heap::revisit_written_pages (BOOL concurrent_p, BOOL reset_only_p)
      bool reset_watch_state = !!concurrent_p;
      bool is_runtime_suspended = !concurrent_p;
      BOOL small_object_segments = TRUE;
-    for(int i = max_generation; i < total_generation_count; i++)
+    for (int i = max_generation; i < total_generation_count; i++)
      {
          heap_segment* seg = heap_segment_rw (generation_start_segment (generation_of (i)));
          PREFIX_ASSUME(seg != NULL);
@@ -32798,6 +32896,10 @@ void gc_heap::compute_new_dynamic_data (int gen_number)
  #ifdef BACKGROUND_GC
              if (i == loh_generation)
                  end_loh_size = total_gen_size;
+
+            if (i == poh_generation)
+                end_poh_size = total_gen_size;
+
  #endif //BACKGROUND_GC
              //update counter
              dd_promoted_size (dd) = out;
@@ -33370,7 +33472,7 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
      }
  }
  
-CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int64_t& alloc_bytes)
+CObjectHeader* gc_heap::allocate_uoh_object (size_t jsize, uint32_t flags, int gen_number, int64_t& alloc_bytes)
  {
      //create a new alloc context because gen3context is shared.
      alloc_context acontext;
@@ -33393,17 +33495,19 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int
  
      size_t size = AlignQword (jsize);
      int align_const = get_alignment_constant (FALSE);
-#ifdef FEATURE_LOH_COMPACTION
-    size_t pad = Align (loh_padding_obj_size, align_const);
-#else
      size_t pad = 0;
+#ifdef FEATURE_LOH_COMPACTION
+    if (gen_number == loh_generation)
+    {
+        pad = Align (loh_padding_obj_size, align_const);
+    }
  #endif //FEATURE_LOH_COMPACTION
  
      assert (size >= Align (min_obj_size, align_const));
  #ifdef _MSC_VER
  #pragma inline_depth(0)
  #endif //_MSC_VER
-    if (! allocate_more_space (&acontext, (size + pad), flags, loh_generation))
+    if (! allocate_more_space (&acontext, (size + pad), flags, gen_number))
      {
          return 0;
      }
@@ -33447,8 +33551,6 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, uint32_t flags, int
              mark_array_clear_marked (result);
          }
  #ifdef BACKGROUND_GC
-        //the object has to cover one full mark uint32_t
-        assert (size >= mark_word_size);
          if (current_c_gc_state != c_gc_state_free)
          {
              dprintf (3, ("Concurrent allocation of a large object %Ix",
@@ -34295,8 +34397,9 @@ void gc_heap::background_sweep()
  
      size_t total_soh_size = generation_sizes (generation_of (max_generation));
      size_t total_loh_size = generation_size (loh_generation);
+    size_t total_poh_size = generation_size (poh_generation);
  
-    dprintf (GTC_LOG, ("loh: %Id, soh: %Id", heap_number, total_loh_size, total_soh_size));
+    dprintf (GTC_LOG, ("h%d: S: poh: %Id, loh: %Id, soh: %Id", heap_number, total_poh_size, total_loh_size, total_soh_size));
  
      dprintf (GTC_LOG, ("end of bgc sweep: gen2 FL: %Id, FO: %Id",
          generation_free_list_space (generation_of (max_generation)),
@@ -34307,6 +34410,11 @@ void gc_heap::background_sweep()
          generation_free_list_space (generation_of (loh_generation)),
          generation_free_obj_space (generation_of (loh_generation))));
  
+    dprintf (GTC_LOG, ("h%d: end of bgc sweep: poh FL: %Id, FO: %Id", 
+        heap_number,
+        generation_free_list_space (generation_of (poh_generation)),
+        generation_free_obj_space (generation_of (poh_generation))));
+
      FIRE_EVENT(BGC2ndConEnd);
      concurrent_print_time_delta ("background sweep");
  
@@ -34556,7 +34664,9 @@ void gc_heap::mark_through_cards_for_uoh_objects (card_fn fn,
      size_t total_cards_cleared = 0;
  
  #ifdef FEATURE_CARD_MARKING_STEALING
-    VOLATILE(uint32_t)* chunk_index = (VOLATILE(uint32_t)*) & card_mark_chunk_index_loh;
+    VOLATILE(uint32_t)* chunk_index = (VOLATILE(uint32_t)*) &(gen_num == loh_generation ? 
+        card_mark_chunk_index_loh : 
+        card_mark_chunk_index_poh);
  
      card_marking_enumerator card_mark_enumerator(seg, low, chunk_index);
      card_word_end = 0;
@@ -35451,7 +35561,7 @@ void gc_heap::verify_partial ()
      BOOL bad_ref_p = FALSE;
      BOOL free_ref_p = FALSE;
  
-    for(int i = max_generation; i < total_generation_count; i++)
+    for (int i = max_generation; i < total_generation_count; i++)
      {
          generation* gen = generation_of (i);
          int align_const = get_alignment_constant (i == max_generation);
@@ -36350,9 +36460,11 @@ HRESULT GCHeap::Initialize()
  
  #ifdef MULTIPLE_HEAPS
      gc_heap::n_heaps = nhp;
-    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, nhp);
+    // TODO: tuning https://github.com/dotnet/runtime/issues/13739
+    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/, nhp);
  #else
-    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/);
+    // TODO: tuning https://github.com/dotnet/runtime/issues/13739
+    hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/);
  #endif //MULTIPLE_HEAPS
  
      if (hr != S_OK)
@@ -36411,7 +36523,7 @@ HRESULT GCHeap::Initialize()
  #ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
  #if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS)
      if (GCStress<cfg_any>::IsEnabled())  {
-        for(int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++)
+        for (int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++)
          {
              m_StressObjs[i] = CreateGlobalHandle(0);
          }
@@ -37157,7 +37269,7 @@ GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint3
  
          alloc_context* acontext = generation_alloc_context (hp->generation_of (loh_generation));
  
-        newAlloc = (Object*) hp->allocate_large_object (size, flags, acontext->alloc_bytes_uoh);
+        newAlloc = (Object*) hp->allocate_uoh_object (size, flags, loh_generation, acontext->alloc_bytes_uoh);
          ASSERT(((size_t)newAlloc & 7) == 0);
      }
  
@@ -37202,7 +37314,7 @@ GCHeap::AllocLHeap( size_t size, uint32_t flags REQD_ALIGN_DCL)
  #endif //MULTIPLE_HEAPS
  
      alloc_context* acontext = generation_alloc_context (hp->generation_of (loh_generation));
-    newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, acontext->alloc_bytes_uoh);
+    newAlloc = (Object*) hp->allocate_uoh_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, loh_generation, acontext->alloc_bytes_uoh);
  
  #ifdef FEATURE_STRUCTALIGN
      newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
@@ -37258,7 +37370,7 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_
      }
      else
      {
-        newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, acontext->alloc_bytes_uoh);
+        newAlloc = (Object*) hp->allocate_uoh_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), flags, loh_generation, acontext->alloc_bytes_uoh);
  #ifdef FEATURE_STRUCTALIGN
          newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
  #endif // FEATURE_STRUCTALIGN
diff --git a/src/coreclr/src/gc/gc.h b/src/coreclr/src/gc/gc.h

index 2ac1710..51bb270 100644 (file)
--- a/src/coreclr/src/gc/gc.h
+++ b/src/coreclr/src/gc/gc.h
@@ -89,7 +89,8 @@ enum gc_etw_segment_type
  {
      gc_etw_segment_small_object_heap = 0,
      gc_etw_segment_large_object_heap = 1,
-    gc_etw_segment_read_only_heap = 2
+    gc_etw_segment_read_only_heap = 2,
+    gc_etw_segment_pinned_object_heap = 3
  };
  
  // Types of allocations, emitted by the GCAllocationTick ETW event.
@@ -119,13 +120,16 @@ enum gc_generation_num
      // large object heap, technically not a generation, but it is convenient to represent it as such
      loh_generation = 3,
  
+    // pinned heap, a separate generation for the same reasons as loh
+    poh_generation = 4,
+
      uoh_start_generation = loh_generation,
  
      // number of ephemeral generations 
      ephemeral_generation_count = max_generation,
  
      // number of all generations 
-    total_generation_count = loh_generation + 1
+    total_generation_count = poh_generation + 1
  };
  
  #ifdef GC_CONFIG_DRIVEN
diff --git a/src/coreclr/src/gc/gcpriv.h b/src/coreclr/src/gc/gcpriv.h

index 952eb83..4801a19 100644 (file)
--- a/src/coreclr/src/gc/gcpriv.h
+++ b/src/coreclr/src/gc/gcpriv.h
@@ -106,6 +106,8 @@ inline void FATAL_GC_ERROR()
  
  #define CARD_BUNDLE         //enable card bundle feature.(requires WRITE_WATCH)
  
+#define ALLOW_REFERENCES_IN_POH  //Allow POH objects to contain references.
+
  // If this is defined we use a map for segments in order to find the heap for
  // a segment fast. But it does use more memory as we have to cover the whole
  // heap range and for each entry we allocate a struct of 5 ptr-size words
@@ -1227,7 +1229,8 @@ public:
  
      static
      HRESULT initialize_gc  (size_t soh_segment_size,
-                            size_t loh_segment_size
+                            size_t loh_segment_size,
+                            size_t poh_segment_size
  #ifdef MULTIPLE_HEAPS
                              , int number_of_heaps
  #endif //MULTIPLE_HEAPS
@@ -1269,13 +1272,13 @@ public:
      void gc_thread_stub (void* arg);
  #endif //MULTIPLE_HEAPS
  
-    // For LOH allocations we only update the alloc_bytes_uoh in allocation
+    // For UOH allocations we only update the alloc_bytes_uoh in allocation
      // context - we don't actually use the ptr/limit from it so I am
      // making this explicit by not passing in the alloc_context.
-    // Note: This is an instance method, but the heap instance is only used for
+    // Note: This are instance methods, but the heap instance is only used for
      // lowest_address and highest_address, which are currently the same accross all heaps.
      PER_HEAP
-    CObjectHeader* allocate_large_object (size_t size, uint32_t flags, int64_t& alloc_bytes);
+    CObjectHeader* allocate_uoh_object (size_t size, uint32_t flags, int gen_num, int64_t& alloc_bytes);
  
  #ifdef FEATURE_STRUCTALIGN
      PER_HEAP
@@ -1541,6 +1544,9 @@ protected:
  
      PER_HEAP
      BOOL bgc_loh_allocate_spin();
+
+    PER_HEAP
+    BOOL bgc_poh_allocate_spin();
  #endif //BACKGROUND_GC
  
  #define max_saved_spinlock_info 48
@@ -3724,7 +3730,11 @@ protected:
      PER_HEAP
      size_t     bgc_begin_loh_size;
      PER_HEAP
+    size_t     bgc_begin_poh_size;
+    PER_HEAP
      size_t     end_loh_size;
+    PER_HEAP
+    size_t     end_poh_size;
  
  #ifdef BGC_SERVO_TUNING
      PER_HEAP
@@ -3758,6 +3768,8 @@ protected:
      // in free list doesn't increase the heap size.
      PER_HEAP
      size_t     bgc_loh_size_increased;
+    PER_HEAP
+    size_t     bgc_poh_size_increased;
  
      PER_HEAP
      size_t     background_soh_alloc_count;
@@ -3895,6 +3907,7 @@ protected:
  
  #define youngest_generation (generation_of (0))
  #define large_object_generation (generation_of (loh_generation))
+#define pinned_object_generation (generation_of (poh_generation))
  
      // The more_space_lock and gc_lock is used for 3 purposes:
      //
@@ -3982,6 +3995,16 @@ protected:
      PER_HEAP
      alloc_list gen2_alloc_list[NUM_GEN2_ALIST-1];
  
+// TODO: tuning https://github.com/dotnet/runtime/issues/13739
+#define NUM_POH_ALIST (12)
+#ifdef BIT64
+#define BASE_POH_ALIST (1*256)
+#else
+#define BASE_POH_ALIST (1*128)
+#endif // BIT64
+    PER_HEAP
+    alloc_list poh_alloc_list[NUM_POH_ALIST-1];
+
  //------------------------------------------    
  
      PER_HEAP
@@ -4272,6 +4295,9 @@ public:
      VOLATILE(uint32_t)    card_mark_chunk_index_loh;
  
      PER_HEAP
+    VOLATILE(uint32_t)    card_mark_chunk_index_poh;
+
+    PER_HEAP
      VOLATILE(bool)        card_mark_done_uoh;
  
      PER_HEAP
@@ -4282,6 +4308,7 @@ public:
          card_mark_done_soh = false;
  
          card_mark_chunk_index_loh = ~0;
+        card_mark_chunk_index_poh = ~0;
          card_mark_done_uoh = false;
      }
  
@@ -4772,6 +4799,7 @@ struct loh_padding_obj
  #define heap_segment_flags_ma_pcommitted 128
  #define heap_segment_flags_uoh_delete   256
  
+#define heap_segment_flags_poh          512
  #endif //BACKGROUND_GC
  
  //need to be careful to keep enough pad items to fit a relocation node
@@ -4860,7 +4888,7 @@ BOOL heap_segment_unmappable_p (heap_segment* inst)
  inline
  BOOL heap_segment_uoh_p (heap_segment * inst)
  {
-    return !!(inst->flags & heap_segment_flags_loh);
+    return !!(inst->flags & (heap_segment_flags_loh | heap_segment_flags_poh));
  }
  
  #ifdef BACKGROUND_GC
author	Vladimir Sadov <vsadov@microsoft.com>
	Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)
committer	GitHub <noreply@github.com>
	Mon, 17 Feb 2020 15:26:21 +0000 (07:26 -0800)
src/coreclr/src/gc/gc.cpp		patch \| blob \| history
src/coreclr/src/gc/gc.h		patch \| blob \| history
src/coreclr/src/gc/gcpriv.h		patch \| blob \| history