From: Peter Sollich Date: Mon, 12 Jun 2023 09:38:12 +0000 (+0200) Subject: Fixes and improvements: (#87218) X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~1712 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=89f3a9ef41383bb409b69d1a0f0db910f3ed9a34;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Fixes and improvements: (#87218) - Fix hang bug caused by race condition in change_heap_count - Change way we store dynamic heap input metrics to make it easier to surface them via ETW events. - Refactor enter_spin_lock_msl into an inlineable part and a slower, more complex out-of-line part. - Subtract time spent in safe_switch_to_thread and WiatLongerNoInstru from msl wait time - this makes this metric much less noisy. - add more diagnostic output to check_heap_count and change_heap_count. - add more spinning to EnterFinalizeLock to address slow suspensions in some ASP.NET benchmarks. --- diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 92f6653..28bae97 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -1413,11 +1413,9 @@ bool gc_heap::should_move_heap (GCSpinLock* msl) // All the places where we could be stopped because there was a suspension should call should_move_heap to check if we need to return // so we can try another heap or we can continue the allocation on the same heap. -enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl) +enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl) { -retry: - - if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free) + do { #ifdef DYNAMIC_HEAP_COUNT uint64_t start = GetHighPrecisionTimeStamp(); @@ -1449,7 +1447,13 @@ retry: } if (VolatileLoad (&msl->lock) != lock_free && !IsGCInProgress ()) { +#ifdef DYNAMIC_HEAP_COUNT + start -= GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT safe_switch_to_thread (); +#ifdef DYNAMIC_HEAP_COUNT + start += GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT } } else @@ -1459,19 +1463,35 @@ retry: } else { +#ifdef DYNAMIC_HEAP_COUNT + start -= GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT WaitLongerNoInstru (i); +#ifdef DYNAMIC_HEAP_COUNT + start += GetHighPrecisionTimeStamp(); +#endif //DYNAMIC_HEAP_COUNT } } #ifdef DYNAMIC_HEAP_COUNT uint64_t end = GetHighPrecisionTimeStamp(); Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start); + dprintf (6666, ("wait for msl lock total time: %zd, total count: %zd, this time: %zd, this count: %u", msl->msl_wait_time, msl->msl_wait_count, end - start, i)); #endif //DYNAMIC_HEAP_COUNT - goto retry; } + while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free); return msl_entered; } +inline +enter_msl_status gc_heap::enter_spin_lock_msl (GCSpinLock* msl) +{ + if (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) == lock_free) + return msl_entered; + + return enter_spin_lock_msl_helper (msl); +} + // // We need the following methods to have volatile arguments, so that they can accept // raw pointers in addition to the results of the & operator on Volatile. @@ -24905,7 +24925,8 @@ void gc_heap::check_heap_count () if (heap_number == 0) { // acquire data for the current sample - uint64_t msl_wait_time = 0; + uint64_t soh_msl_wait_time = 0; + uint64_t uoh_msl_wait_time = 0; size_t allocating_thread_count = 0; size_t heap_size = 0; for (int i = 0; i < n_heaps; i++) @@ -24914,11 +24935,13 @@ void gc_heap::check_heap_count () allocating_thread_count += hp->alloc_contexts_used; - msl_wait_time += hp->more_space_lock_soh.msl_wait_time; + soh_msl_wait_time += hp->more_space_lock_soh.msl_wait_time; hp->more_space_lock_soh.msl_wait_time = 0; + hp->more_space_lock_soh.msl_wait_count = 0; - msl_wait_time += hp->more_space_lock_uoh.msl_wait_time; + uoh_msl_wait_time += hp->more_space_lock_uoh.msl_wait_time; hp->more_space_lock_uoh.msl_wait_time = 0; + hp->more_space_lock_uoh.msl_wait_count = 0; for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++) { @@ -24935,15 +24958,17 @@ void gc_heap::check_heap_count () // persist data for the current sample dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index]; - sample.msl_wait_time = msl_wait_time; + sample.soh_msl_wait_time = soh_msl_wait_time / n_heaps; + sample.uoh_msl_wait_time = uoh_msl_wait_time / n_heaps; sample.elapsed_between_gcs = dd_time_clock (hp0_dd0) - dd_previous_time_clock (hp0_dd0); sample.gc_elapsed_time = dd_gc_elapsed_time (hp0_dd0); sample.allocating_thread_count = allocating_thread_count; sample.heap_size = heap_size; - dprintf (6666, ("sample %d: msl_wait_time: %zd, elapsed_between_gcs: %zd, gc_elapsed_time: %d, heap_size: %zd MB", + dprintf (6666, ("sample %d: soh_msl_wait_time: %zd, uoh_msl_wait_time: %zd, elapsed_between_gcs: %zd, gc_elapsed_time: %d, heap_size: %zd MB", dynamic_heap_count_data.sample_index, - sample.msl_wait_time, + sample.soh_msl_wait_time, + sample.uoh_msl_wait_time, sample.elapsed_between_gcs, sample.gc_elapsed_time, sample.heap_size/(1024*1024))); @@ -24969,7 +24994,7 @@ void gc_heap::check_heap_count () for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++) { dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i]; - uint64_t overhead_time = (sample.msl_wait_time / n_heaps) + sample.gc_elapsed_time; + uint64_t overhead_time = sample.soh_msl_wait_time + sample.uoh_msl_wait_time + sample.gc_elapsed_time; percent_overhead[i] = overhead_time * 100.0f / sample.elapsed_between_gcs; if (percent_overhead[i] < 0) percent_overhead[i] = 0; @@ -25075,6 +25100,14 @@ void gc_heap::check_heap_count () dynamic_heap_count_data.new_n_heaps = new_n_heaps; + // store data used for decision to emit in ETW event + dynamic_heap_count_data.median_percent_overhead = median_percent_overhead; + dynamic_heap_count_data.percent_heap_space_cost_per_heap = percent_heap_space_cost_per_heap; + dynamic_heap_count_data.overhead_reduction_per_step_up = overhead_reduction_per_step_up; + dynamic_heap_count_data.overhead_increase_per_step_down = overhead_increase_per_step_down; + dynamic_heap_count_data.space_cost_increase_per_step_up = space_cost_increase_per_step_up; + dynamic_heap_count_data.space_cost_decrease_per_step_down = space_cost_decrease_per_step_down; + if (new_n_heaps != n_heaps) { // can't have threads allocating while we change the number of heaps @@ -25091,6 +25124,7 @@ void gc_heap::check_heap_count () else if (settings.gc_index < prev_change_heap_count_gc_index + 3) { // reconsider the decision every few gcs + dprintf (6666, ("checked heap count at %zd, now at %zd", prev_change_heap_count_gc_index, settings.gc_index)); return; } @@ -25107,6 +25141,7 @@ void gc_heap::check_heap_count () if (dynamic_heap_count_data.new_n_heaps == n_heaps) { // heap count stays the same, no work to do + dprintf (6666, ("heap count stays the same, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, n_heaps)); return; } @@ -25266,8 +25301,12 @@ bool gc_heap::change_heap_count (int new_n_heaps) } } - if (dynamic_heap_count_data.new_n_heaps == n_heaps) + // gc_heap::n_heaps may have changed by now, compare to the snapshot *before* the join + if (dynamic_heap_count_data.new_n_heaps == old_n_heaps) + { + dprintf (6666, ("failed to change heap count, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, old_n_heaps)); return false; + } if (heap_number == 0) { @@ -50749,7 +50788,18 @@ retry: unsigned int i = 0; while (lock >= 0) { - YieldProcessor(); // indicate to the processor that we are spinning + if (g_num_processors > 1) + { + int spin_count = 128 * yp_spin_count_unit; + for (int j = 0; j < spin_count; j++) + { + if (lock < 0) + break; + YieldProcessor (); // indicate to the processor that we are spinning + } + } + if (lock < 0) + break; if (++i & 7) GCToOSInterface::YieldThread (0); else diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h index 1263610..d970fa9 100644 --- a/src/coreclr/gc/gcpriv.h +++ b/src/coreclr/gc/gcpriv.h @@ -2215,6 +2215,8 @@ private: PER_HEAP_METHOD bool should_move_heap (GCSpinLock* msl); + PER_HEAP_METHOD enter_msl_status enter_spin_lock_msl_helper (GCSpinLock* msl); + PER_HEAP_METHOD enter_msl_status enter_spin_lock_msl (GCSpinLock* msl); PER_HEAP_METHOD size_t get_full_compact_gc_count(); @@ -4244,7 +4246,8 @@ private: { uint64_t elapsed_between_gcs; // time between gcs in microseconds uint64_t gc_elapsed_time; // time the gc took - uint64_t msl_wait_time; // time the allocator spent waiting for the msl lock + uint64_t soh_msl_wait_time; // time the allocator spent waiting for the soh msl lock + uint64_t uoh_msl_wait_time; // time the allocator spent waiting for the uoh msl lock size_t allocating_thread_count;// number of allocating threads size_t heap_size; }; @@ -4252,6 +4255,13 @@ private: unsigned sample_index; sample samples[sample_size]; + float median_percent_overhead; // estimated overhead of allocator + gc + float percent_heap_space_cost_per_heap; // percent space cost of adding a heap + float overhead_reduction_per_step_up; // percentage effect on overhead of increasing heap count + float overhead_increase_per_step_down; // percentage effect on overhead of decreasing heap count + float space_cost_increase_per_step_up; // percentage effect on space of increasing heap count + float space_cost_decrease_per_step_down;// percentage effect on space of decreasing heap count + int new_n_heaps; #ifdef STRESS_DYNAMIC_HEAP_COUNT int lowest_heap_with_msl_uoh;