Fixes and improvements: (#87218)

author Peter Sollich <petersol@microsoft.com>

Mon, 12 Jun 2023 09:38:12 +0000 (11:38 +0200)

committer GitHub <noreply@github.com>

Mon, 12 Jun 2023 09:38:12 +0000 (11:38 +0200)
author Peter Sollich <petersol@microsoft.com>
Mon, 12 Jun 2023 09:38:12 +0000 (11:38 +0200)
committer GitHub <noreply@github.com>
Mon, 12 Jun 2023 09:38:12 +0000 (11:38 +0200)
diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp

index 92f6653..28bae97 100644 (file)
--- a/src/coreclr/gc/gc.cpp
+++ b/src/coreclr/gc/gc.cpp
@@ -1413,11 +1413,9 @@ bool gc_heap::should_move_heap (GCSpinLock* msl)
  
  // All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return
  // so we can try another heap or we can continue the allocation on the same heap.
-enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl)
+enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl)
  {
-retry:
-
-    if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free)
+    do
      {
  #ifdef DYNAMIC_HEAP_COUNT
          uint64_t start = GetHighPrecisionTimeStamp();
@@ -1449,7 +1447,13 @@ retry:
                      }
                      if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ())
                      {
+#ifdef DYNAMIC_HEAP_COUNT
+                        start -= GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
                          safe_switch_to_thread ();
+#ifdef DYNAMIC_HEAP_COUNT
+                        start += GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
                      }
                  }
                  else
@@ -1459,19 +1463,35 @@ retry:
              }
              else
              {
+#ifdef DYNAMIC_HEAP_COUNT
+                start -= GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
                  WaitLongerNoInstru (i);
+#ifdef DYNAMIC_HEAP_COUNT
+                start += GetHighPrecisionTimeStamp();
+#endif //DYNAMIC_HEAP_COUNT
              }
          }
  #ifdef DYNAMIC_HEAP_COUNT
          uint64_t end = GetHighPrecisionTimeStamp();
          Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start);
+        dprintf (6666, ("wait for msl lock total time: %zd, total count: %zd, this time: %zd, this count: %u", msl->msl_wait_time, msl->msl_wait_count, end - start, i));
  #endif //DYNAMIC_HEAP_COUNT
-        goto retry;
      }
+    while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free);
  
      return msl_entered;
  }
  
+inline
+enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl)
+{
+    if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free)
+        return msl_entered;
+
+    return enter_spin_lock_msl_helper (msl);
+}
+
  //
  // We need the following methods to have volatile arguments, so that they can accept
  // raw pointers in addition to the results of the & operator on Volatile<T>.
@@ -24905,7 +24925,8 @@ void gc_heap::check_heap_count ()
      if (heap_number == 0)
      {
          // acquire data for the current sample
-        uint64_t    msl_wait_time = 0;
+        uint64_t    soh_msl_wait_time = 0;
+        uint64_t    uoh_msl_wait_time = 0;
          size_t      allocating_thread_count = 0;
          size_t      heap_size = 0;
          for (int i = 0; i < n_heaps; i++)
@@ -24914,11 +24935,13 @@ void gc_heap::check_heap_count ()
  
              allocating_thread_count += hp->alloc_contexts_used;
  
-            msl_wait_time += hp->more_space_lock_soh.msl_wait_time;
+            soh_msl_wait_time += hp->more_space_lock_soh.msl_wait_time;
              hp->more_space_lock_soh.msl_wait_time = 0;
+            hp->more_space_lock_soh.msl_wait_count = 0;
  
-            msl_wait_time += hp->more_space_lock_uoh.msl_wait_time;
+            uoh_msl_wait_time += hp->more_space_lock_uoh.msl_wait_time;
              hp->more_space_lock_uoh.msl_wait_time = 0;
+            hp->more_space_lock_uoh.msl_wait_count = 0;
  
              for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
              {
@@ -24935,15 +24958,17 @@ void gc_heap::check_heap_count ()
          // persist data for the current sample
          dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
  
-        sample.msl_wait_time = msl_wait_time;
+        sample.soh_msl_wait_time = soh_msl_wait_time / n_heaps;
+        sample.uoh_msl_wait_time = uoh_msl_wait_time / n_heaps;
          sample.elapsed_between_gcs = dd_time_clock (hp0_dd0) - dd_previous_time_clock (hp0_dd0);
          sample.gc_elapsed_time = dd_gc_elapsed_time (hp0_dd0);
          sample.allocating_thread_count = allocating_thread_count;
          sample.heap_size = heap_size;
  
-        dprintf (6666, ("sample %d: msl_wait_time: %zd, elapsed_between_gcs: %zd, gc_elapsed_time: %d, heap_size: %zd MB",
+        dprintf (6666, ("sample %d: soh_msl_wait_time: %zd, uoh_msl_wait_time: %zd, elapsed_between_gcs: %zd, gc_elapsed_time: %d, heap_size: %zd MB",
              dynamic_heap_count_data.sample_index,
-            sample.msl_wait_time,
+            sample.soh_msl_wait_time,
+            sample.uoh_msl_wait_time,
              sample.elapsed_between_gcs,
              sample.gc_elapsed_time,
              sample.heap_size/(1024*1024)));
@@ -24969,7 +24994,7 @@ void gc_heap::check_heap_count ()
              for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++)
              {
                  dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i];
-                uint64_t overhead_time = (sample.msl_wait_time / n_heaps) + sample.gc_elapsed_time;
+                uint64_t overhead_time = sample.soh_msl_wait_time + sample.uoh_msl_wait_time + sample.gc_elapsed_time;
                  percent_overhead[i] = overhead_time * 100.0f / sample.elapsed_between_gcs;
                  if (percent_overhead[i] < 0)
                      percent_overhead[i] = 0;
@@ -25075,6 +25100,14 @@ void gc_heap::check_heap_count ()
  
              dynamic_heap_count_data.new_n_heaps = new_n_heaps;
  
+            // store data used for decision to emit in ETW event
+            dynamic_heap_count_data.median_percent_overhead           = median_percent_overhead;
+            dynamic_heap_count_data.percent_heap_space_cost_per_heap  = percent_heap_space_cost_per_heap;
+            dynamic_heap_count_data.overhead_reduction_per_step_up    = overhead_reduction_per_step_up;
+            dynamic_heap_count_data.overhead_increase_per_step_down   = overhead_increase_per_step_down;
+            dynamic_heap_count_data.space_cost_increase_per_step_up   = space_cost_increase_per_step_up;
+            dynamic_heap_count_data.space_cost_decrease_per_step_down = space_cost_decrease_per_step_down;
+
              if (new_n_heaps != n_heaps)
              {
                  // can't have threads allocating while we change the number of heaps
@@ -25091,6 +25124,7 @@ void gc_heap::check_heap_count ()
      else if (settings.gc_index < prev_change_heap_count_gc_index + 3)
      {
          // reconsider the decision every few gcs
+        dprintf (6666, ("checked heap count at %zd, now at %zd", prev_change_heap_count_gc_index, settings.gc_index));
          return;
      }
  
@@ -25107,6 +25141,7 @@ void gc_heap::check_heap_count ()
      if (dynamic_heap_count_data.new_n_heaps == n_heaps)
      {
          // heap count stays the same, no work to do
+        dprintf (6666, ("heap count stays the same, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, n_heaps));
          return;
      }
  
@@ -25266,8 +25301,12 @@ bool gc_heap::change_heap_count (int new_n_heaps)
          }
      }
  
-    if (dynamic_heap_count_data.new_n_heaps == n_heaps)
+    // gc_heap::n_heaps may have changed by now, compare to the snapshot *before* the join
+    if (dynamic_heap_count_data.new_n_heaps == old_n_heaps)
+    {
+        dprintf (6666, ("failed to change heap count, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, old_n_heaps));
          return false;
+    }
  
      if (heap_number == 0)
      {
@@ -50749,7 +50788,18 @@ retry:
          unsigned int i = 0;
          while (lock >= 0)
          {
-            YieldProcessor();           // indicate to the processor that we are spinning
+            if (g_num_processors > 1)
+            {
+                int spin_count = 128 * yp_spin_count_unit;
+                for (int j = 0; j < spin_count; j++)
+                {
+                    if (lock < 0)
+                        break;
+                    YieldProcessor ();           // indicate to the processor that we are spinning
+                }
+            }
+            if (lock < 0)
+                break;
              if (++i & 7)
                  GCToOSInterface::YieldThread (0);
              else
diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h

index 1263610..d970fa9 100644 (file)
--- a/src/coreclr/gc/gcpriv.h
+++ b/src/coreclr/gc/gcpriv.h
@@ -2215,6 +2215,8 @@ private:
  
      PER_HEAP_METHOD bool should_move_heap (GCSpinLock* msl);
  
+    PER_HEAP_METHOD enter_msl_status enter_spin_lock_msl_helper (GCSpinLock* msl);
+
      PER_HEAP_METHOD enter_msl_status enter_spin_lock_msl (GCSpinLock* msl);
  
      PER_HEAP_METHOD size_t get_full_compact_gc_count();
@@ -4244,7 +4246,8 @@ private:
          {
              uint64_t    elapsed_between_gcs;    // time between gcs in microseconds
              uint64_t    gc_elapsed_time;        // time the gc took
-            uint64_t    msl_wait_time;          // time the allocator spent waiting for the msl lock
+            uint64_t    soh_msl_wait_time;      // time the allocator spent waiting for the soh msl lock
+            uint64_t    uoh_msl_wait_time;      // time the allocator spent waiting for the uoh msl lock
              size_t      allocating_thread_count;// number of allocating threads
              size_t      heap_size;
          };
@@ -4252,6 +4255,13 @@ private:
          unsigned        sample_index;
          sample          samples[sample_size];
  
+        float median_percent_overhead;          // estimated overhead of allocator + gc
+        float percent_heap_space_cost_per_heap; // percent space cost of adding a heap
+        float overhead_reduction_per_step_up;   // percentage effect on overhead of increasing heap count
+        float overhead_increase_per_step_down;  // percentage effect on overhead of decreasing heap count
+        float space_cost_increase_per_step_up;  // percentage effect on space of increasing heap count
+        float space_cost_decrease_per_step_down;// percentage effect on space of decreasing heap count
+
          int             new_n_heaps;
  #ifdef STRESS_DYNAMIC_HEAP_COUNT
          int             lowest_heap_with_msl_uoh;
author	Peter Sollich <petersol@microsoft.com>
	Mon, 12 Jun 2023 09:38:12 +0000 (11:38 +0200)
committer	GitHub <noreply@github.com>
	Mon, 12 Jun 2023 09:38:12 +0000 (11:38 +0200)
src/coreclr/gc/gc.cpp		patch \| blob \| history
src/coreclr/gc/gcpriv.h		patch \| blob \| history