From: Maoni Stephens Date: Wed, 26 Dec 2018 21:13:21 +0000 (-0800) Subject: desktop port (dotnet/coreclr#21523) X-Git-Tag: submit/tizen/20210909.063632~11030^2~2995 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8eee7ea7a81ffd76f826403d7959fd03028eadac;p=platform%2Fupstream%2Fdotnet%2Fruntime.git desktop port (dotnet/coreclr#21523) +alloc lock split into SOH and LOH +provisional mode to fix too many gen2 GCs triggered in low mem situation when the heap has heavy pinning fragmentation +better free list usage +premature OOM fixes +3 new configs: GCHeapAffinitizeMask, GCHighMemPercent, GCLOHThreshold (will be documented) YieldProcessor scaling factor is different on core due to the different implementation on core. Commit migrated from https://github.com/dotnet/coreclr/commit/aa13ca95d633e9251fa040533d3d5650808455c0 --- diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index 11d2a43..c0c4c7e 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -90,10 +90,6 @@ if(WIN32) endif(WIN32) # Features - please keep them alphabetically sorted -if (FEATURE_APPDOMAIN_RESOURCE_MONITORING) - add_definitions(-DFEATURE_APPDOMAIN_RESOURCE_MONITORING) -endif(FEATURE_APPDOMAIN_RESOURCE_MONITORING) - if(WIN32) add_definitions(-DFEATURE_APPX) if(NOT CLR_CMAKE_TARGET_ARCH_I386) diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 88b9cdd..bb4f09f 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -20,10 +20,6 @@ if(NOT DEFINED FEATURE_INTERPRETER) set(FEATURE_INTERPRETER 0) endif(NOT DEFINED FEATURE_INTERPRETER) -if(NOT DEFINED FEATURE_APPDOMAIN_RESOURCE_MONITORING) - set(FEATURE_APPDOMAIN_RESOURCE_MONITORING 1) -endif(NOT DEFINED FEATURE_APPDOMAIN_RESOURCE_MONITORING) - if(NOT DEFINED FEATURE_STANDALONE_GC) set(FEATURE_STANDALONE_GC 1) endif(NOT DEFINED FEATURE_STANDALONE_GC) diff --git a/src/coreclr/src/gc/env/gcenv.ee.h b/src/coreclr/src/gc/env/gcenv.ee.h index bd58169..ec72517 100644 --- a/src/coreclr/src/gc/env/gcenv.ee.h +++ b/src/coreclr/src/gc/env/gcenv.ee.h @@ -94,6 +94,8 @@ public: static bool AnalyzeSurvivorsRequested(int condemnedGeneration); static void AnalyzeSurvivorsFinished(int condemnedGeneration); + + static void VerifySyncTableEntry(); }; #endif // __GCENV_EE_H__ diff --git a/src/coreclr/src/gc/gc.cpp b/src/coreclr/src/gc/gc.cpp index 095eb5c..95576fe 100644 --- a/src/coreclr/src/gc/gc.cpp +++ b/src/coreclr/src/gc/gc.cpp @@ -18,10 +18,31 @@ // #include "gcpriv.h" -#include "softwarewritewatch.h" #define USE_INTROSORT +// We just needed a simple random number generator for testing. +class gc_rand +{ +public: + static uint64_t x; + + static uint64_t get_rand() + { + x = (314159269*x+278281) & 0x7FFFFFFF; + return x; + } + + // obtain random number in the range 0 .. r-1 + static uint64_t get_rand(uint64_t r) { + // require r >= 0 + uint64_t x = (uint64_t)((get_rand() * r) >> 31); + return x; + } +}; + +uint64_t gc_rand::x = 0; + #if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE) BOOL bgc_heap_walk_for_etw_p = FALSE; #endif //BACKGROUND_GC && FEATURE_EVENT_TRACE @@ -61,6 +82,9 @@ BOOL bgc_heap_walk_for_etw_p = FALSE; #define MAX_SUPPORTED_CPUS 64 #endif // BIT64 +uint32_t yp_spin_count_unit = 0; +size_t loh_size_threshold = LARGE_OBJECT_SIZE; + #ifdef GC_CONFIG_DRIVEN int compact_ratio = 0; #endif //GC_CONFIG_DRIVEN @@ -109,8 +133,26 @@ const char * const allocation_state_str[] = { "trigger_2nd_ephemeral_gc", "check_retry_seg" }; + +const char * const msl_take_state_str[] = { + "get_large_seg", + "bgc_loh_sweep", + "wait_bgc", + "block_gc", + "clr_mem", + "clr_large_mem", + "t_eph_gc", + "t_full_gc", + "alloc_small", + "alloc_large", + "alloc_small_cant", + "alloc_large_cant", + "try_alloc", + "try_budget" +}; #endif //TRACE_GC && !DACCESS_COMPILE + // Keep this in sync with the definition of gc_reason #if (defined(DT_LOG) || defined(TRACE_GC)) && !defined (DACCESS_COMPILE) static const char* const str_gc_reasons[] = @@ -125,7 +167,10 @@ static const char* const str_gc_reasons[] = "induced_noforce", "gcstress", "induced_lowmem", - "induced_compacting" + "induced_compacting", + "lowmemory_host", + "pm_full_gc", + "lowmemory_host_blocking" }; static const char* const str_gc_pause_modes[] = @@ -145,7 +190,9 @@ BOOL is_induced (gc_reason reason) (reason == reason_induced_noforce) || (reason == reason_lowmemory) || (reason == reason_lowmemory_blocking) || - (reason == reason_induced_compacting)); + (reason == reason_induced_compacting) || + (reason == reason_lowmemory_host) || + (reason == reason_lowmemory_host_blocking)); } inline @@ -153,7 +200,8 @@ BOOL is_induced_blocking (gc_reason reason) { return ((reason == reason_induced) || (reason == reason_lowmemory_blocking) || - (reason == reason_induced_compacting)); + (reason == reason_induced_compacting) || + (reason == reason_lowmemory_host_blocking)); } #ifndef DACCESS_COMPILE @@ -167,7 +215,6 @@ size_t GetHighPrecisionTimeStamp() } #endif - #ifdef GC_STATS // There is a current and a prior copy of the statistics. This allows us to display deltas per reporting // interval, as well as running totals. The 'min' and 'max' values require special treatment. They are @@ -208,10 +255,8 @@ void GCStatistics::AddGCStats(const gc_mechanisms& settings, size_t timeInMSec) if (is_induced (settings.reason)) cntReasons[(int)reason_induced]++; -#ifdef STRESS_HEAP else if (settings.stress_induced) cntReasons[(int)reason_gcstress]++; -#endif // STRESS_HEAP else cntReasons[(int)settings.reason]++; @@ -486,7 +531,7 @@ void log_va_msg(const char *fmt, va_list args) int pid_len = sprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, "[%5d]", (uint32_t)GCToOSInterface::GetCurrentThreadIdForLogging()); buffer_start += pid_len; memset(&pBuffer[buffer_start], '-', BUFFERSIZE - buffer_start); - int msg_len = _vsnprintf_s(&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args ); + int msg_len = _vsnprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args); if (msg_len == -1) { msg_len = BUFFERSIZE - buffer_start; @@ -694,8 +739,10 @@ enum gc_join_flavor join_flavor_server_gc = 0, join_flavor_bgc = 1 }; - + #define first_thread_arrived 2 +#pragma warning(push) +#pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure { // Shared non volatile keep on separate line to prevent eviction @@ -714,6 +761,7 @@ struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure VOLATILE(int32_t) r_join_lock; }; +#pragma warning(pop) enum join_type { @@ -831,7 +879,7 @@ public: if (color == join_struct.lock_color.LoadWithoutBarrier()) { respin: - int spin_count = 4096 * (gc_heap::n_heaps - 1); + int spin_count = 128 * yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (color != join_struct.lock_color.LoadWithoutBarrier()) @@ -848,9 +896,9 @@ respin: flavor, join_id, color, (int32_t)(join_struct.join_lock))); //Thread* current_thread = GCToEEInterface::GetThread(); - //BOOL cooperative_mode = gc_heap::enable_preemptive (current_thread); + //BOOL cooperative_mode = gc_heap::enable_preemptive (); uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE); - //gc_heap::disable_preemptive (current_thread, cooperative_mode); + //gc_heap::disable_preemptive (cooperative_mode); if (dwJoinWait != WAIT_OBJECT_0) { @@ -920,7 +968,7 @@ respin: if (!join_struct.wait_done) { respin: - int spin_count = 2 * 4096 * (gc_heap::n_heaps - 1); + int spin_count = 256 * yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (join_struct.wait_done) @@ -1650,9 +1698,9 @@ retry: if (g_num_processors > 1) { #ifndef MULTIPLE_HEAPS - int spin_count = 1024 * g_num_processors; + int spin_count = 32 * yp_spin_count_unit; #else //!MULTIPLE_HEAPS - int spin_count = 32 * g_num_processors; + int spin_count = yp_spin_count_unit; #endif //!MULTIPLE_HEAPS for (int j = 0; j < spin_count; j++) { @@ -1796,9 +1844,9 @@ retry: if (g_num_processors > 1) { #ifndef MULTIPLE_HEAPS - int spin_count = 1024 * g_num_processors; + int spin_count = 32 * yp_spin_count_unit; #else //!MULTIPLE_HEAPS - int spin_count = 32 * g_num_processors; + int spin_count = yp_spin_count_unit; #endif //!MULTIPLE_HEAPS for (int j = 0; j < spin_count; j++) { @@ -2114,7 +2162,7 @@ uint8_t* gc_heap::pad_for_alignment_large (uint8_t* newAlloc, int requiredAlignm #define CLR_SIZE ((size_t)(8*1024)) #endif //SERVER_GC -#define END_SPACE_AFTER_GC (LARGE_OBJECT_SIZE + MAX_STRUCTALIGN) +#define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN) #ifdef BACKGROUND_GC #define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE) @@ -2397,8 +2445,8 @@ VOLATILE(BOOL) gc_heap::gc_started; #ifdef MULTIPLE_HEAPS GCEvent gc_heap::gc_start_event; - bool gc_heap::gc_thread_no_affinitize_p = false; +uintptr_t process_mask = 0; int gc_heap::n_heaps; @@ -2468,6 +2516,10 @@ uint64_t gc_heap::mem_one_percent = 0; uint32_t gc_heap::high_memory_load_th = 0; +uint32_t gc_heap::m_high_memory_load_th; + +uint32_t gc_heap::v_high_memory_load_th; + uint64_t gc_heap::total_physical_mem = 0; uint64_t gc_heap::entry_available_physical_mem = 0; @@ -2521,6 +2573,8 @@ BOOL gc_heap::elevation_requested = FALSE; BOOL gc_heap::last_gc_before_oom = FALSE; +BOOL gc_heap::sufficient_gen0_space_p = FALSE; + #ifdef BACKGROUND_GC uint8_t* gc_heap::background_saved_lowest_address = 0; uint8_t* gc_heap::background_saved_highest_address = 0; @@ -2596,7 +2650,9 @@ size_t gc_heap::mark_stack_array_length = 0; mark* gc_heap::mark_stack_array = 0; +#if defined (_DEBUG) && defined (VERIFY_HEAP) BOOL gc_heap::verify_pinned_queue_p = FALSE; +#endif // defined (_DEBUG) && defined (VERIFY_HEAP) uint8_t* gc_heap::oldest_pinned_plug = 0; @@ -2795,8 +2851,18 @@ VOLATILE(bool) gc_heap::full_gc_approach_event_set; size_t gc_heap::full_gc_counts[gc_type_max]; +bool gc_heap::maxgen_size_inc_p = false; + BOOL gc_heap::should_expand_in_full_gc = FALSE; +// Provisional mode related stuff. +bool gc_heap::provisional_mode_triggered = false; +bool gc_heap::pm_trigger_full_gc = false; +size_t gc_heap::provisional_triggered_gc_count = 0; +size_t gc_heap::provisional_off_gc_count = 0; +size_t gc_heap::num_provisional_triggered = 0; +bool gc_heap::pm_stress_on = false; + #ifdef HEAP_ANALYZE BOOL gc_heap::heap_analyze_enabled = FALSE; #endif //HEAP_ANALYZE @@ -2814,7 +2880,9 @@ uint8_t* gc_heap::alloc_allocated = 0; size_t gc_heap::allocation_quantum = CLR_SIZE; -GCSpinLock gc_heap::more_space_lock; +GCSpinLock gc_heap::more_space_lock_soh; +GCSpinLock gc_heap::more_space_lock_loh; +VOLATILE(int32_t) gc_heap::loh_alloc_thread_count = 0; #ifdef SYNCHRONIZATION_STATS unsigned int gc_heap::good_suspension = 0; @@ -3011,18 +3079,18 @@ void gc_heap::fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per void gc_heap::fire_pevents() { -#ifndef CORECLR settings.record (&gc_data_global); gc_data_global.print(); - FIRE_EVENT(GCGlobalHeapHistory_V2, gc_data_global.final_youngest_desired, - gc_data_global.num_heaps, - gc_data_global.condemned_generation, - gc_data_global.gen0_reduction_count, - gc_data_global.reason, - gc_data_global.global_mechanims_p, - gc_data_global.pause_mode, - gc_data_global.mem_pressure); + FIRE_EVENT(GCGlobalHeapHistory_V2, + gc_data_global.final_youngest_desired, + gc_data_global.num_heaps, + gc_data_global.condemned_generation, + gc_data_global.gen0_reduction_count, + gc_data_global.reason, + gc_data_global.global_mechanims_p, + gc_data_global.pause_mode, + gc_data_global.mem_pressure); #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) @@ -3035,7 +3103,6 @@ void gc_heap::fire_pevents() gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); fire_per_heap_hist_event (current_gc_data_per_heap, heap_number); #endif -#endif //!CORECLR } inline BOOL @@ -3060,12 +3127,10 @@ gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp) dprintf (GTC_LOG, ("h%d: plan eph size is %Id, new gen0 is %Id", heap_number, plan_ephemeral_size, new_gen0size)); - // If we were in no_gc_region we could have allocated a larger than normal segment, // and the next seg we allocate will be a normal sized seg so if we can't fit the new // ephemeral generations there, do an ephemeral promotion. ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size)); - break; } default: @@ -3937,11 +4002,7 @@ public: Validate(bDeep, FALSE); } - ADIndex GetAppDomainIndex() - { - return (ADIndex)RH_DEFAULT_DOMAIN_ID; - } -#endif //FEATURE_REDHAWK +#endif //FEATURE_REDHAWK || BUILD_AS_STANDALONE ///// // @@ -4440,7 +4501,7 @@ void* virtual_alloc (size_t size) // We don't want (prgmem + size) to be right at the end of the address space // because we'd have to worry about that everytime we do (address + size). - // We also want to make sure that we leave LARGE_OBJECT_SIZE at the end + // We also want to make sure that we leave loh_size_threshold at the end // so we allocate a small object we don't need to worry about overflow there // when we do alloc_ptr+size. if (prgmem) @@ -4882,10 +4943,8 @@ gc_heap::get_large_segment (size_t size, BOOL* did_full_compact_gc) size_t last_full_compact_gc_count = get_full_compact_gc_count(); //access to get_segment needs to be serialized - add_saved_spinlock_info (me_release, mt_get_large_seg); - - dprintf (SPINLOCK_LOG, ("[%d]Seg: Lmsl", heap_number)); - leave_spin_lock (&more_space_lock); + add_saved_spinlock_info (true, me_release, mt_get_large_seg); + leave_spin_lock (&more_space_lock_loh); enter_spin_lock (&gc_heap::gc_lock); dprintf (SPINLOCK_LOG, ("[%d]Seg: Egc", heap_number)); // if a GC happened between here and before we ask for a segment in @@ -4897,21 +4956,6 @@ gc_heap::get_large_segment (size_t size, BOOL* did_full_compact_gc) *did_full_compact_gc = TRUE; } -#ifdef BACKGROUND_GC - while (current_c_gc_state == c_gc_state_planning) - { - dprintf (3, ("lh state planning, waiting to get a large seg")); - - dprintf (SPINLOCK_LOG, ("[%d]Seg: P, Lgc", heap_number)); - leave_spin_lock (&gc_lock); - background_gc_wait_lh (awr_get_loh_seg); - enter_spin_lock (&gc_lock); - dprintf (SPINLOCK_LOG, ("[%d]Seg: P, Egc", heap_number)); - } - assert ((current_c_gc_state == c_gc_state_free) || - (current_c_gc_state == c_gc_state_marking)); -#endif //BACKGROUND_GC - heap_segment* res = get_segment_for_loh (size #ifdef MULTIPLE_HEAPS , this @@ -4920,13 +4964,8 @@ gc_heap::get_large_segment (size_t size, BOOL* did_full_compact_gc) dprintf (SPINLOCK_LOG, ("[%d]Seg: A Lgc", heap_number)); leave_spin_lock (&gc_heap::gc_lock); - enter_spin_lock (&more_space_lock); - dprintf (SPINLOCK_LOG, ("[%d]Seg: A Emsl", heap_number)); - add_saved_spinlock_info (me_acquire, mt_get_large_seg); - -#ifdef BACKGROUND_GC - wait_for_background_planning (awr_get_loh_seg); -#endif //BACKGROUND_GC + enter_spin_lock (&more_space_lock_loh); + add_saved_spinlock_info (true, me_acquire, mt_get_large_seg); return res; } @@ -5291,39 +5330,35 @@ void set_thread_affinity_mask_for_heap(int heap_number, GCThreadAffinity* affini affinity->Group = GCThreadAffinity::None; affinity->Processor = GCThreadAffinity::None; - uintptr_t pmask, smask; - if (GCToOSInterface::GetCurrentProcessAffinityMask(&pmask, &smask)) + uintptr_t pmask = process_mask; + int bit_number = 0; + uint8_t proc_number = 0; + for (uintptr_t mask = 1; mask != 0; mask <<= 1) { - pmask &= smask; - int bit_number = 0; - uint8_t proc_number = 0; - for (uintptr_t mask = 1; mask != 0; mask <<= 1) + if ((mask & pmask) != 0) { - if ((mask & pmask) != 0) + if (bit_number == heap_number) { - if (bit_number == heap_number) + dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number)); + affinity->Processor = proc_number; + heap_select::set_proc_no_for_heap(heap_number, proc_number); + if (GCToOSInterface::CanEnableGCNumaAware()) { - dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number)); - affinity->Processor = proc_number; - heap_select::set_proc_no_for_heap(heap_number, proc_number); - if (GCToOSInterface::CanEnableGCNumaAware()) + uint16_t node_no = 0; + PROCESSOR_NUMBER proc_no; + proc_no.Group = 0; + proc_no.Number = (uint8_t)proc_number; + proc_no.Reserved = 0; + if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no)) { - uint16_t node_no = 0; - PROCESSOR_NUMBER proc_no; - proc_no.Group = 0; - proc_no.Number = (uint8_t)proc_number; - proc_no.Reserved = 0; - if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no)) - { - heap_select::set_numa_node_for_heap(heap_number, node_no); - } + heap_select::set_numa_node_for_heap(heap_number, node_no); } - return; } - bit_number++; + return; } - proc_number++; + bit_number++; } + proc_number++; } } @@ -5379,8 +5414,15 @@ void gc_heap::gc_thread_function () assert ((heap_number == 0) || proceed_with_gc_p); if (proceed_with_gc_p) + { garbage_collect (GCHeap::GcCondemnedGeneration); + if (pm_trigger_full_gc) + { + garbage_collect_pm_full_gc(); + } + } + if (heap_number == 0) { if (proceed_with_gc_p && (!settings.concurrent)) @@ -5396,9 +5438,8 @@ void gc_heap::gc_thread_function () for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; - hp->add_saved_spinlock_info (me_release, mt_block_gc); - dprintf (SPINLOCK_LOG, ("[%d]GC Lmsl", i)); - leave_spin_lock(&hp->more_space_lock); + hp->add_saved_spinlock_info (false, me_release, mt_block_gc); + leave_spin_lock(&hp->more_space_lock_soh); } #endif //MULTIPLE_HEAPS @@ -5451,7 +5492,7 @@ bool virtual_alloc_commit_for_heap(void* addr, size_t size, int h_number) #if defined(MULTIPLE_HEAPS) && !defined(FEATURE_REDHAWK) // Currently there is no way for us to specific the numa node to allocate on via hosting interfaces to // a host. This will need to be added later. -#if !defined(FEATURE_CORECLR) +#if !defined(FEATURE_CORECLR) && !defined(BUILD_AS_STANDALONE) if (!CLRMemoryHosted()) #endif { @@ -5942,13 +5983,13 @@ struct fix_alloc_context_args void* heap; }; -void fix_alloc_context(gc_alloc_context* acontext, void* param) +void fix_alloc_context (gc_alloc_context* acontext, void* param) { fix_alloc_context_args* args = (fix_alloc_context_args*)param; - g_theGCHeap->FixAllocContext(acontext, false, (void*)(size_t)(args->for_gc_p), args->heap); + g_theGCHeap->FixAllocContext(acontext, (void*)(size_t)(args->for_gc_p), args->heap); } -void gc_heap::fix_allocation_contexts(BOOL for_gc_p) +void gc_heap::fix_allocation_contexts (BOOL for_gc_p) { fix_alloc_context_args args; args.for_gc_p = for_gc_p; @@ -5974,6 +6015,16 @@ void gc_heap::fix_older_allocation_area (generation* older_gen) assert ((size >= Align (min_obj_size))); dprintf(3,("Making unused area [%Ix, %Ix[", (size_t)point, (size_t)point+size)); make_unused_array (point, size); + if (size >= min_free_list) + { + generation_allocator (older_gen)->thread_item_front (point, size); + add_gen_free (older_gen->gen_num, size); + generation_free_list_space (older_gen) += size; + } + else + { + generation_free_obj_space (older_gen) += size; + } } } else @@ -5984,6 +6035,9 @@ void gc_heap::fix_older_allocation_area (generation* older_gen) generation_allocation_limit (older_gen) = generation_allocation_pointer (older_gen); } + + generation_allocation_pointer (older_gen) = 0; + generation_allocation_limit (older_gen) = 0; } void gc_heap::set_allocation_heap_segment (generation* gen) @@ -6742,7 +6796,6 @@ short*& card_table_brick_table (uint32_t* c_table) } #ifdef CARD_BUNDLE -// Get the card bundle table for the specified card table. inline uint32_t*& card_table_card_bundle_table (uint32_t* c_table) { @@ -7441,6 +7494,8 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, saved_g_lowest_address, saved_g_highest_address); + seg_mapping_table = new_seg_mapping_table; + // Since the runtime is already suspended, update the write barrier here as well. // This passes a bool telling whether we need to switch to the post // grow version of the write barrier. This test tells us if the new @@ -7466,14 +7521,13 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, #endif } - seg_mapping_table = new_seg_mapping_table; - - GCToOSInterface::FlushProcessWriteBuffers(); - g_gc_lowest_address = saved_g_lowest_address; - g_gc_highest_address = saved_g_highest_address; - if (!write_barrier_updated) { + seg_mapping_table = new_seg_mapping_table; + GCToOSInterface::FlushProcessWriteBuffers(); + g_gc_lowest_address = saved_g_lowest_address; + g_gc_highest_address = saved_g_highest_address; + // This passes a bool telling whether we need to switch to the post // grow version of the write barrier. This test tells us if the new // segment was allocated at a lower address than the old, requiring @@ -7485,7 +7539,6 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, stomp_write_barrier_resize(GCToEEInterface::IsGCThread(), la != saved_g_lowest_address); } - return 0; fail: @@ -7688,8 +7741,11 @@ void gc_heap::copy_brick_card_table() #ifdef CARD_BUNDLE #if defined(MARK_ARRAY) && defined(_DEBUG) + size_t cb_end = (size_t)((uint8_t*)card_table_card_bundle_table (ct) + size_card_bundle_of (g_gc_lowest_address, g_gc_highest_address)); #ifdef GROWABLE_SEG_MAPPING_TABLE size_t st = size_seg_mapping_table_of (g_gc_lowest_address, g_gc_highest_address); + size_t cb_end_aligned = align_for_seg_mapping_table (cb_end); + st += (cb_end_aligned - cb_end); #else //GROWABLE_SEG_MAPPING_TABLE size_t st = 0; #endif //GROWABLE_SEG_MAPPING_TABLE @@ -8776,7 +8832,8 @@ public: size_t plug_size_to_fit = plug_size; - int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0; + // best fit is only done for gen1 to gen2 and we do not pad in gen2. + int pad_in_front = 0; #ifdef SHORT_PLUGS plug_size_to_fit += (pad_in_front ? Align(min_obj_size) : 0); @@ -10007,6 +10064,12 @@ HRESULT gc_heap::initialize_gc (size_t segment_size, #endif //MULTIPLE_HEAPS +#ifdef MULTIPLE_HEAPS + yp_spin_count_unit = 32 * number_of_heaps; +#else + yp_spin_count_unit = 32 * g_num_processors; +#endif //MULTIPLE_HEAPS + if (!init_semi_shared()) { hres = E_FAIL; @@ -10092,6 +10155,9 @@ gc_heap::init_semi_shared() loh_compaction_mode = loh_compaction_default; #endif //FEATURE_LOH_COMPACTION + loh_size_threshold = (size_t)GCConfig::GetLOHThreshold(); + assert (loh_size_threshold >= LARGE_OBJECT_SIZE); + #ifdef BACKGROUND_GC memset (ephemeral_fgc_counts, 0, sizeof (ephemeral_fgc_counts)); bgc_alloc_spin_count = static_cast(GCConfig::GetBGCSpinCount()); @@ -10261,7 +10327,7 @@ retry: { if (g_num_processors > 1) { - int spin_count = 32 * g_num_processors; + int spin_count = yp_spin_count_unit; for (int j = 0; j < spin_count; j++) { if (gc_done_event_lock < 0) @@ -10298,6 +10364,7 @@ GCEvent gc_heap::gc_done_event; VOLATILE(bool) gc_heap::internal_gc_done; void gc_heap::add_saved_spinlock_info ( + bool loh_p, msl_enter_state enter_state, msl_take_state take_state) @@ -10308,6 +10375,12 @@ void gc_heap::add_saved_spinlock_info ( current->enter_state = enter_state; current->take_state = take_state; current->thread_id.SetToCurrentThread(); + current->loh_p = loh_p; + dprintf (SPINLOCK_LOG, ("[%d]%s %s %s", + heap_number, + (loh_p ? "loh" : "soh"), + ((enter_state == me_acquire) ? "E" : "L"), + msl_take_state_str[take_state])); spinlock_info_index++; @@ -10358,7 +10431,9 @@ gc_heap::init_gc_heap (int h_number) mark_stack_array = 0; +#if defined (_DEBUG) && defined (VERIFY_HEAP) verify_pinned_queue_p = FALSE; +#endif // _DEBUG && VERIFY_HEAP loh_pinned_queue_tos = 0; @@ -10380,7 +10455,9 @@ gc_heap::init_gc_heap (int h_number) allocation_quantum = CLR_SIZE; - more_space_lock = gc_lock; + more_space_lock_soh = gc_lock; + + more_space_lock_loh = gc_lock; ro_segments_in_range = FALSE; @@ -10388,6 +10465,8 @@ gc_heap::init_gc_heap (int h_number) new_heap_segment = NULL; + gen0_allocated_after_gc_p = false; + #ifdef RECORD_LOH_STATE loh_state_index = 0; #endif //RECORD_LOH_STATE @@ -10628,6 +10707,8 @@ gc_heap::init_gc_heap (int h_number) last_gc_before_oom = FALSE; + sufficient_gen0_space_p = FALSE; + #ifdef MULTIPLE_HEAPS #ifdef HEAP_ANALYZE @@ -11344,14 +11425,26 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, alloc_context* acontext, heap_segment* seg, int align_const, int gen_number) { + bool loh_p = (gen_number > 0); + GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh; + size_t aligned_min_obj_size = Align(min_obj_size, align_const); - //probably should pass seg==0 for free lists. if (seg) { assert (heap_segment_used (seg) <= heap_segment_committed (seg)); } +#ifdef MULTIPLE_HEAPS + if (gen_number == 0) + { + if (!gen0_allocated_after_gc_p) + { + gen0_allocated_after_gc_p = true; + } + } +#endif //MULTIPLE_HEAPS + dprintf (3, ("Expanding segment allocation [%Ix, %Ix[", (size_t)start, (size_t)start + limit_size - aligned_min_obj_size)); @@ -11372,21 +11465,24 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, } acontext->alloc_ptr = start; } - else - { - // If the next alloc context is right up against the current one it means we are absorbing the min - // object, so need to account for that. - acontext->alloc_bytes += (start - acontext->alloc_limit); - } - + else + { + if (gen_number == 0) + { + size_t pad_size = Align (min_obj_size, align_const); + make_unused_array (acontext->alloc_ptr, pad_size); + dprintf (3, ("contigous ac: making min obj gap %Ix->%Ix(%Id)", + acontext->alloc_ptr, (acontext->alloc_ptr + pad_size), pad_size)); + acontext->alloc_ptr += pad_size; + } + } acontext->alloc_limit = (start + limit_size - aligned_min_obj_size); acontext->alloc_bytes += limit_size - ((gen_number < max_generation + 1) ? aligned_min_obj_size : 0); #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING - if (g_fEnableARM) + if (g_fEnableAppDomainMonitoring) { - AppDomain* alloc_appdomain = GetAppDomain(); - alloc_appdomain->RecordAllocBytes (limit_size, heap_number); + GCToEEInterface::RecordAllocatedBytesForHeap(limit_size, heap_number); } #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING @@ -11425,9 +11521,8 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, if ((seg == 0) || (start - plug_skew + limit_size) <= heap_segment_used (seg)) { - dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear memory(1)", heap_number)); - add_saved_spinlock_info (me_release, mt_clr_mem); - leave_spin_lock (&more_space_lock); + add_saved_spinlock_info (loh_p, me_release, mt_clr_mem); + leave_spin_lock (msl); dprintf (3, ("clearing memory at %Ix for %d bytes", (start - plug_skew), limit_size)); memclr (start - plug_skew, limit_size); } @@ -11436,9 +11531,9 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, uint8_t* used = heap_segment_used (seg); heap_segment_used (seg) = start + limit_size - plug_skew; - dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear memory", heap_number)); - add_saved_spinlock_info (me_release, mt_clr_mem); - leave_spin_lock (&more_space_lock); + add_saved_spinlock_info (loh_p, me_release, mt_clr_mem); + leave_spin_lock (msl); + if ((start - plug_skew) < used) { if (used != saved_used) @@ -11481,19 +11576,36 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size, //verify_mem_cleared (start - plug_skew, limit_size); } -/* in order to make the allocator faster, allocate returns a - * 0 filled object. Care must be taken to set the allocation limit to the - * allocation pointer after gc - */ +size_t gc_heap::new_allocation_limit (size_t size, size_t physical_limit, int gen_number) +{ + dynamic_data* dd = dynamic_data_of (gen_number); + ptrdiff_t new_alloc = dd_new_allocation (dd); + assert (new_alloc == (ptrdiff_t)Align (new_alloc, + get_alignment_constant (!(gen_number == (max_generation+1))))); -size_t gc_heap::limit_from_size (size_t size, size_t room, int gen_number, + ptrdiff_t logical_limit = max (new_alloc, (ptrdiff_t)size); + size_t limit = min (logical_limit, (ptrdiff_t)physical_limit); + assert (limit == Align (limit, get_alignment_constant (!(gen_number == (max_generation+1))))); + dd_new_allocation (dd) = (new_alloc - limit); + return limit; +} + +size_t gc_heap::limit_from_size (size_t size, size_t physical_limit, int gen_number, int align_const) { - size_t new_limit = new_allocation_limit ((size + Align (min_obj_size, align_const)), - min (room,max (size + Align (min_obj_size, align_const), - ((gen_number < max_generation+1) ? - allocation_quantum : - 0))), + size_t padded_size = size + Align (min_obj_size, align_const); + // for LOH this is not true...we could select a physical_limit that's exactly the same + // as size. + assert ((gen_number != 0) || (physical_limit >= padded_size)); + size_t min_size_to_allocate = ((gen_number == 0) ? allocation_quantum : 0); + + // For SOH if the size asked for is very small, we want to allocate more than + // just what's asked for if possible. + size_t desired_size_to_allocate = max (padded_size, min_size_to_allocate); + size_t new_physical_limit = min (physical_limit, desired_size_to_allocate); + + size_t new_limit = new_allocation_limit (padded_size, + new_physical_limit, gen_number); assert (new_limit >= (size + Align (min_obj_size, align_const))); dprintf (100, ("requested to allocate %Id bytes, actual size is %Id", size, new_limit)); @@ -11787,10 +11899,22 @@ BOOL gc_heap::short_on_end_of_seg (int gen_number, UNREFERENCED_PARAMETER(gen_number); uint8_t* allocated = heap_segment_allocated(seg); - return (!a_size_fit_p (end_space_after_gc(), - allocated, - heap_segment_reserved (seg), - align_const)); + BOOL sufficient_p = a_size_fit_p (end_space_after_gc(), + allocated, + heap_segment_reserved (seg), + align_const); + + if (!sufficient_p) + { + if (sufficient_gen0_space_p) + { + dprintf (GTC_LOG, ("gen0 has enough free space")); + } + + sufficient_p = sufficient_gen0_space_p; + } + + return !sufficient_p; } #ifdef _MSC_VER @@ -11884,10 +12008,9 @@ void gc_heap::bgc_loh_alloc_clr (uint8_t* alloc_start, make_unused_array (alloc_start, size); #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING - if (g_fEnableARM) + if (g_fEnableAppDomainMonitoring) { - AppDomain* alloc_appdomain = GetAppDomain(); - alloc_appdomain->RecordAllocBytes (size, heap_number); + GCToEEInterface::RecordAllocatedBytesForHeap(size, heap_number); } #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING @@ -11938,8 +12061,8 @@ void gc_heap::bgc_loh_alloc_clr (uint8_t* alloc_start, #endif //VERIFY_HEAP dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear large obj", heap_number)); - add_saved_spinlock_info (me_release, mt_clr_large_mem); - leave_spin_lock (&more_space_lock); + add_saved_spinlock_info (true, me_release, mt_clr_large_mem); + leave_spin_lock (&more_space_lock_loh); memclr (alloc_start + size_to_skip, size_to_clear); bgc_alloc_lock->loh_alloc_set (alloc_start); @@ -11956,10 +12079,6 @@ BOOL gc_heap::a_fit_free_list_large_p (size_t size, alloc_context* acontext, int align_const) { -#ifdef BACKGROUND_GC - wait_for_background_planning (awr_loh_alloc_during_plan); -#endif //BACKGROUND_GC - BOOL can_fit = FALSE; int gen_number = max_generation + 1; generation* gen = generation_of (gen_number); @@ -11994,6 +12113,7 @@ BOOL gc_heap::a_fit_free_list_large_p (size_t size, { #ifdef BACKGROUND_GC cookie = bgc_alloc_lock->loh_alloc_set (free_list); + bgc_track_loh_alloc(); #endif //BACKGROUND_GC //unlink the free_item @@ -12078,9 +12198,10 @@ BOOL gc_heap::a_fit_segment_end_p (int gen_number, size_t pad = Align (min_obj_size, align_const); #ifdef FEATURE_LOH_COMPACTION + size_t loh_pad = Align (loh_padding_obj_size, align_const); if (gen_number == (max_generation + 1)) { - pad += Align (loh_padding_obj_size, align_const); + pad += loh_pad; } #endif //FEATURE_LOH_COMPACTION @@ -12119,6 +12240,7 @@ found_fit: if (gen_number != 0) { cookie = bgc_alloc_lock->loh_alloc_set (allocated); + bgc_track_loh_alloc(); } #endif //BACKGROUND_GC @@ -12127,7 +12249,6 @@ found_fit: #ifdef FEATURE_LOH_COMPACTION if (gen_number == (max_generation + 1)) { - size_t loh_pad = Align (loh_padding_obj_size, align_const); make_unused_array (old_alloc, loh_pad); old_alloc += loh_pad; allocated += loh_pad; @@ -12173,25 +12294,30 @@ BOOL gc_heap::loh_a_fit_segment_end_p (int gen_number, while (seg) { - if (a_fit_segment_end_p (gen_number, seg, (size - Align (min_obj_size, align_const)), - acontext, align_const, commit_failed_p)) +#ifdef BACKGROUND_GC + if (seg->flags & heap_segment_flags_loh_delete) { - acontext->alloc_limit += Align (min_obj_size, align_const); - can_allocate_p = TRUE; - break; + dprintf (3, ("h%d skipping seg %Ix to be deleted", heap_number, (size_t)seg)); } else +#endif //BACKGROUND_GC { - if (*commit_failed_p) + if (a_fit_segment_end_p (gen_number, seg, (size - Align (min_obj_size, align_const)), + acontext, align_const, commit_failed_p)) { - *oom_r = oom_cant_commit; + acontext->alloc_limit += Align (min_obj_size, align_const); + can_allocate_p = TRUE; break; } - else + + if (*commit_failed_p) { - seg = heap_segment_next_rw (seg); + *oom_r = oom_cant_commit; + break; } } + + seg = heap_segment_next_rw (seg); } return can_allocate_p; @@ -12199,28 +12325,28 @@ BOOL gc_heap::loh_a_fit_segment_end_p (int gen_number, #ifdef BACKGROUND_GC inline -void gc_heap::wait_for_background (alloc_wait_reason awr) +void gc_heap::wait_for_background (alloc_wait_reason awr, bool loh_p) { + GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh; + dprintf (2, ("BGC is already in progress, waiting for it to finish")); - dprintf (SPINLOCK_LOG, ("[%d]Lmsl to wait for bgc done", heap_number)); - add_saved_spinlock_info (me_release, mt_wait_bgc); - leave_spin_lock (&more_space_lock); + add_saved_spinlock_info (loh_p, me_release, mt_wait_bgc); + leave_spin_lock (msl); background_gc_wait (awr); - enter_spin_lock (&more_space_lock); - add_saved_spinlock_info (me_acquire, mt_wait_bgc); - dprintf (SPINLOCK_LOG, ("[%d]Emsl after waiting for bgc done", heap_number)); + enter_spin_lock (msl); + add_saved_spinlock_info (loh_p, me_acquire, mt_wait_bgc); } -void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr) +void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr, bool loh_p) { if (recursive_gc_sync::background_running_p()) { uint32_t memory_load; get_memory_info (&memory_load); - if (memory_load >= 95) + if (memory_load >= m_high_memory_load_th) { dprintf (GTC_LOG, ("high mem - wait for BGC to finish, wait reason: %d", awr)); - wait_for_background (awr); + wait_for_background (awr, loh_p); } } } @@ -12232,7 +12358,7 @@ void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr) BOOL gc_heap::trigger_ephemeral_gc (gc_reason gr) { #ifdef BACKGROUND_GC - wait_for_bgc_high_memory (awr_loh_oos_bgc); + wait_for_bgc_high_memory (awr_loh_oos_bgc, false); #endif //BACKGROUND_GC BOOL did_full_compact_gc = FALSE; @@ -12242,9 +12368,8 @@ BOOL gc_heap::trigger_ephemeral_gc (gc_reason gr) vm_heap->GarbageCollectGeneration(max_generation - 1, gr); #ifdef MULTIPLE_HEAPS - enter_spin_lock (&more_space_lock); - add_saved_spinlock_info (me_acquire, mt_t_eph_gc); - dprintf (SPINLOCK_LOG, ("[%d]Emsl after a GC", heap_number)); + enter_spin_lock (&more_space_lock_soh); + add_saved_spinlock_info (false, me_acquire, mt_t_eph_gc); #endif //MULTIPLE_HEAPS size_t current_full_compact_gc_count = get_full_compact_gc_count(); @@ -12301,15 +12426,13 @@ BOOL gc_heap::allocate_small (int gen_number, background_soh_alloc_count++; if ((background_soh_alloc_count % bgc_alloc_spin_count) == 0) { - add_saved_spinlock_info (me_release, mt_alloc_small); - dprintf (SPINLOCK_LOG, ("[%d]spin Lmsl", heap_number)); - leave_spin_lock (&more_space_lock); - bool cooperative_mode = enable_preemptive (); + add_saved_spinlock_info (false, me_release, mt_alloc_small); + leave_spin_lock (&more_space_lock_soh); + bool cooperative_mode = enable_preemptive(); GCToOSInterface::Sleep (bgc_alloc_spin); disable_preemptive (cooperative_mode); - enter_spin_lock (&more_space_lock); - add_saved_spinlock_info (me_acquire, mt_alloc_small); - dprintf (SPINLOCK_LOG, ("[%d]spin Emsl", heap_number)); + enter_spin_lock (&more_space_lock_soh); + add_saved_spinlock_info (false, me_acquire, mt_alloc_small); } else { @@ -12382,35 +12505,29 @@ BOOL gc_heap::allocate_small (int gen_number, can_use_existing_p = soh_try_fit (gen_number, size, acontext, align_const, &commit_failed_p, &short_seg_end_p); - if (short_seg_end_p) + + if (can_use_existing_p) + { + soh_alloc_state = a_state_can_allocate; + } +#ifdef MULTIPLE_HEAPS + else if (gen0_allocated_after_gc_p) + { + // some other threads already grabbed the more space lock and allocated + // so we should attempt an ephemeral GC again. + soh_alloc_state = a_state_trigger_ephemeral_gc; + } +#endif //MULTIPLE_HEAPS + else if (short_seg_end_p) { soh_alloc_state = a_state_cant_allocate; oom_r = oom_budget; } - else + else { - if (can_use_existing_p) - { - soh_alloc_state = a_state_can_allocate; - } - else - { -#ifdef MULTIPLE_HEAPS - if (!commit_failed_p) - { - // some other threads already grabbed the more space lock and allocated - // so we should attempt an ephemeral GC again. - assert (heap_segment_allocated (ephemeral_heap_segment) < alloc_allocated); - soh_alloc_state = a_state_trigger_ephemeral_gc; - } - else -#endif //MULTIPLE_HEAPS - { - assert (commit_failed_p); - soh_alloc_state = a_state_cant_allocate; - oom_r = oom_cant_commit; - } - } + assert (commit_failed_p); + soh_alloc_state = a_state_cant_allocate; + oom_r = oom_cant_commit; } break; } @@ -12419,7 +12536,7 @@ BOOL gc_heap::allocate_small (int gen_number, BOOL bgc_in_progress_p = FALSE; BOOL did_full_compacting_gc = FALSE; - bgc_in_progress_p = check_and_wait_for_bgc (awr_gen0_oos_bgc, &did_full_compacting_gc); + bgc_in_progress_p = check_and_wait_for_bgc (awr_gen0_oos_bgc, &did_full_compacting_gc, false); soh_alloc_state = (did_full_compacting_gc ? a_state_try_fit_after_cg : a_state_try_fit_after_bgc); @@ -12447,45 +12564,41 @@ BOOL gc_heap::allocate_small (int gen_number, bgc_in_progress_p = recursive_gc_sync::background_running_p(); #endif //BACKGROUND_GC - if (short_seg_end_p) + if (can_use_existing_p) { - soh_alloc_state = (bgc_in_progress_p ? - a_state_check_and_wait_for_bgc : - a_state_trigger_full_compact_gc); - - if (fgn_maxgen_percent) - { - dprintf (2, ("FGN: doing last GC before we throw OOM")); - send_full_gc_notification (max_generation, FALSE); - } + soh_alloc_state = a_state_can_allocate; } else { - if (can_use_existing_p) + if (short_seg_end_p) { - soh_alloc_state = a_state_can_allocate; - } - else - { -#ifdef MULTIPLE_HEAPS - if (!commit_failed_p) + if (should_expand_in_full_gc) { - // some other threads already grabbed the more space lock and allocated - // so we should attempt an ephemeral GC again. - assert (heap_segment_allocated (ephemeral_heap_segment) < alloc_allocated); - soh_alloc_state = a_state_trigger_ephemeral_gc; + dprintf (2, ("gen1 GC wanted to expand!")); + soh_alloc_state = a_state_trigger_full_compact_gc; } else -#endif //MULTIPLE_HEAPS { - soh_alloc_state = a_state_trigger_full_compact_gc; - if (fgn_maxgen_percent) - { - dprintf (2, ("FGN: failed to commit, doing full compacting GC")); - send_full_gc_notification (max_generation, FALSE); - } + soh_alloc_state = (bgc_in_progress_p ? + a_state_check_and_wait_for_bgc : + a_state_trigger_full_compact_gc); } } + else if (commit_failed_p) + { + soh_alloc_state = a_state_trigger_full_compact_gc; + } + else + { +#ifdef MULTIPLE_HEAPS + // some other threads already grabbed the more space lock and allocated + // so we should attemp an ephemeral GC again. + assert (gen0_allocated_after_gc_p); + soh_alloc_state = a_state_trigger_ephemeral_gc; +#else //MULTIPLE_HEAPS + assert (!"shouldn't get here"); +#endif //MULTIPLE_HEAPS + } } } break; @@ -12523,9 +12636,15 @@ BOOL gc_heap::allocate_small (int gen_number, } case a_state_trigger_full_compact_gc: { + if (fgn_maxgen_percent) + { + dprintf (2, ("FGN: SOH doing last GC before we throw OOM")); + send_full_gc_notification (max_generation, FALSE); + } + BOOL got_full_compacting_gc = FALSE; - got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r); + got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, false); soh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate); break; } @@ -12547,9 +12666,8 @@ exit: heap_segment_allocated (ephemeral_heap_segment), heap_segment_reserved (ephemeral_heap_segment)); - dprintf (SPINLOCK_LOG, ("[%d]Lmsl for oom", heap_number)); - add_saved_spinlock_info (me_release, mt_alloc_small_cant); - leave_spin_lock (&more_space_lock); + add_saved_spinlock_info (false, me_release, mt_alloc_small_cant); + leave_spin_lock (&more_space_lock_soh); } return (soh_alloc_state == a_state_can_allocate); @@ -12557,27 +12675,28 @@ exit: #ifdef BACKGROUND_GC inline -void gc_heap::wait_for_background_planning (alloc_wait_reason awr) +void gc_heap::bgc_track_loh_alloc() { - while (current_c_gc_state == c_gc_state_planning) + if (current_c_gc_state == c_gc_state_planning) { - dprintf (3, ("lh state planning, cannot allocate")); + Interlocked::Increment (&loh_alloc_thread_count); + dprintf (3, ("h%d: inc lc: %d", heap_number, loh_alloc_thread_count)); + } +} - dprintf (SPINLOCK_LOG, ("[%d]Lmsl to wait for bgc plan", heap_number)); - add_saved_spinlock_info (me_release, mt_wait_bgc_plan); - leave_spin_lock (&more_space_lock); - background_gc_wait_lh (awr); - enter_spin_lock (&more_space_lock); - add_saved_spinlock_info (me_acquire, mt_wait_bgc_plan); - dprintf (SPINLOCK_LOG, ("[%d]Emsl after waiting for bgc plan", heap_number)); +inline +void gc_heap::bgc_untrack_loh_alloc() +{ + if (current_c_gc_state == c_gc_state_planning) + { + Interlocked::Decrement (&loh_alloc_thread_count); + dprintf (3, ("h%d: dec lc: %d", heap_number, loh_alloc_thread_count)); } - assert ((current_c_gc_state == c_gc_state_free) || - (current_c_gc_state == c_gc_state_marking)); } BOOL gc_heap::bgc_loh_should_allocate() { - size_t min_gc_size = dd_min_size(dynamic_data_of (max_generation + 1)); + size_t min_gc_size = dd_min_size (dynamic_data_of (max_generation + 1)); if ((bgc_begin_loh_size + bgc_loh_size_increased) < (min_gc_size * 10)) { @@ -12673,7 +12792,8 @@ BOOL gc_heap::retry_full_compact_gc (size_t size) } BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr, - BOOL* did_full_compact_gc) + BOOL* did_full_compact_gc, + bool loh_p) { BOOL bgc_in_progress = FALSE; *did_full_compact_gc = FALSE; @@ -12682,7 +12802,7 @@ BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr, { bgc_in_progress = TRUE; size_t last_full_compact_gc_count = get_full_compact_gc_count(); - wait_for_background (awr); + wait_for_background (awr, loh_p); size_t current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { @@ -12730,7 +12850,8 @@ BOOL gc_heap::loh_try_fit (int gen_number, } BOOL gc_heap::trigger_full_compact_gc (gc_reason gr, - oom_reason* oom_r) + oom_reason* oom_r, + bool loh_p) { BOOL did_full_compact_gc = FALSE; @@ -12745,11 +12866,12 @@ BOOL gc_heap::trigger_full_compact_gc (gc_reason gr, #ifdef BACKGROUND_GC if (recursive_gc_sync::background_running_p()) { - wait_for_background ((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc); + wait_for_background (((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc), loh_p); dprintf (2, ("waited for BGC - done")); } #endif //BACKGROUND_GC + GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh; size_t current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { @@ -12760,13 +12882,8 @@ BOOL gc_heap::trigger_full_compact_gc (gc_reason gr, } dprintf (3, ("h%d full GC", heap_number)); - vm_heap->GarbageCollectGeneration(max_generation, gr); -#ifdef MULTIPLE_HEAPS - enter_spin_lock (&more_space_lock); - dprintf (SPINLOCK_LOG, ("[%d]Emsl after full gc", heap_number)); - add_saved_spinlock_info (me_acquire, mt_t_full_gc); -#endif //MULTIPLE_HEAPS + trigger_gc_for_alloc (max_generation, gr, msl, loh_p, mt_t_full_gc); current_full_compact_gc_count = get_full_compact_gc_count(); @@ -12820,7 +12937,7 @@ BOOL gc_heap::allocate_large (int gen_number, int align_const) { #ifdef BACKGROUND_GC - if (recursive_gc_sync::background_running_p() && (current_c_gc_state != c_gc_state_planning)) + if (recursive_gc_sync::background_running_p()) { background_loh_alloc_count++; //if ((background_loh_alloc_count % bgc_alloc_spin_count_loh) == 0) @@ -12829,20 +12946,19 @@ BOOL gc_heap::allocate_large (int gen_number, { if (!bgc_alloc_spin_loh) { - add_saved_spinlock_info (me_release, mt_alloc_large); - dprintf (SPINLOCK_LOG, ("[%d]spin Lmsl loh", heap_number)); - leave_spin_lock (&more_space_lock); - bool cooperative_mode = enable_preemptive (); + add_saved_spinlock_info (true, me_release, mt_alloc_large); + leave_spin_lock (&more_space_lock_loh); + bool cooperative_mode = enable_preemptive(); GCToOSInterface::YieldThread (bgc_alloc_spin_loh); disable_preemptive (cooperative_mode); - enter_spin_lock (&more_space_lock); - add_saved_spinlock_info (me_acquire, mt_alloc_large); + enter_spin_lock (&more_space_lock_loh); + add_saved_spinlock_info (true, me_acquire, mt_alloc_large); dprintf (SPINLOCK_LOG, ("[%d]spin Emsl loh", heap_number)); } } else { - wait_for_background (awr_loh_alloc_during_bgc); + wait_for_background (awr_loh_alloc_during_bgc, true); } } } @@ -13024,13 +13140,7 @@ BOOL gc_heap::allocate_large (int gen_number, BOOL bgc_in_progress_p = FALSE; BOOL did_full_compacting_gc = FALSE; - if (fgn_maxgen_percent) - { - dprintf (2, ("FGN: failed to acquire seg, may need to do a full blocking GC")); - send_full_gc_notification (max_generation, FALSE); - } - - bgc_in_progress_p = check_and_wait_for_bgc (awr_loh_oos_bgc, &did_full_compacting_gc); + bgc_in_progress_p = check_and_wait_for_bgc (awr_loh_oos_bgc, &did_full_compacting_gc, true); loh_alloc_state = (!bgc_in_progress_p ? a_state_trigger_full_compact_gc : (did_full_compacting_gc ? @@ -13040,9 +13150,15 @@ BOOL gc_heap::allocate_large (int gen_number, } case a_state_trigger_full_compact_gc: { + if (fgn_maxgen_percent) + { + dprintf (2, ("FGN: LOH doing last GC before we throw OOM")); + send_full_gc_notification (max_generation, FALSE); + } + BOOL got_full_compacting_gc = FALSE; - got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r); + got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, true); loh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate); assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure)); break; @@ -13088,14 +13204,45 @@ exit: 0, 0); - add_saved_spinlock_info (me_release, mt_alloc_large_cant); - dprintf (SPINLOCK_LOG, ("[%d]Lmsl for loh oom", heap_number)); - leave_spin_lock (&more_space_lock); + add_saved_spinlock_info (true, me_release, mt_alloc_large_cant); + leave_spin_lock (&more_space_lock_loh); } return (loh_alloc_state == a_state_can_allocate); } +// BGC's final mark phase will acquire the msl, so release it here and re-acquire. +void gc_heap::trigger_gc_for_alloc (int gen_number, gc_reason gr, + GCSpinLock* msl, bool loh_p, + msl_take_state take_state) +{ +#ifdef BACKGROUND_GC + if (loh_p) + { + add_saved_spinlock_info (loh_p, me_release, take_state); + leave_spin_lock (msl); + } +#endif //BACKGROUND_GC + + vm_heap->GarbageCollectGeneration (gen_number, gr); + +#ifdef MULTIPLE_HEAPS + if (!loh_p) + { + enter_spin_lock (msl); + add_saved_spinlock_info (loh_p, me_acquire, take_state); + } +#endif //MULTIPLE_HEAPS + +#ifdef BACKGROUND_GC + if (loh_p) + { + enter_spin_lock (msl); + add_saved_spinlock_info (loh_p, me_acquire, take_state); + } +#endif //BACKGROUND_GC +} + int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size, int gen_number) { @@ -13105,11 +13252,14 @@ int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size, return -1; } + bool loh_p = (gen_number > 0); + GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh; + #ifdef SYNCHRONIZATION_STATS int64_t msl_acquire_start = GCToOSInterface::QueryPerformanceCounter(); #endif //SYNCHRONIZATION_STATS - enter_spin_lock (&more_space_lock); - add_saved_spinlock_info (me_acquire, mt_try_alloc); + enter_spin_lock (msl); + add_saved_spinlock_info (loh_p, me_acquire, mt_try_alloc); dprintf (SPINLOCK_LOG, ("[%d]Emsl for alloc", heap_number)); #ifdef SYNCHRONIZATION_STATS int64_t msl_acquire = GCToOSInterface::QueryPerformanceCounter() - msl_acquire_start; @@ -13166,7 +13316,7 @@ int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size, } #ifdef BACKGROUND_GC - wait_for_bgc_high_memory (awr_gen0_alloc); + wait_for_bgc_high_memory (awr_gen0_alloc, loh_p); #endif //BACKGROUND_GC #ifdef SYNCHRONIZATION_STATS @@ -13176,12 +13326,8 @@ int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size, if (!settings.concurrent || (gen_number == 0)) { - vm_heap->GarbageCollectGeneration (0, ((gen_number == 0) ? reason_alloc_soh : reason_alloc_loh)); -#ifdef MULTIPLE_HEAPS - enter_spin_lock (&more_space_lock); - add_saved_spinlock_info (me_acquire, mt_try_budget); - dprintf (SPINLOCK_LOG, ("[%d]Emsl out budget", heap_number)); -#endif //MULTIPLE_HEAPS + trigger_gc_for_alloc (0, ((gen_number == 0) ? reason_alloc_soh : reason_alloc_loh), + msl, loh_p, mt_try_budget); } } @@ -13331,39 +13477,42 @@ try_again: org_hp->alloc_context_count--; max_hp->alloc_context_count++; acontext->set_alloc_heap(GCHeap::GetHeap(max_hp->heap_number)); - if (GCToOSInterface::CanEnableGCCPUGroups()) - { //only set ideal processor when max_hp and org_hp are in the same cpu - //group. DO NOT MOVE THREADS ACROSS CPU GROUPS - uint16_t org_gn = heap_select::find_cpu_group_from_heap_no(org_hp->heap_number); - uint16_t max_gn = heap_select::find_cpu_group_from_heap_no(max_hp->heap_number); - if (org_gn == max_gn) //only set within CPU group, so SetThreadIdealProcessor is enough - { - uint16_t group_proc_no = heap_select::find_group_proc_from_heap_no(max_hp->heap_number); + if (!gc_thread_no_affinitize_p) + { + if (GCToOSInterface::CanEnableGCCPUGroups()) + { //only set ideal processor when max_hp and org_hp are in the same cpu + //group. DO NOT MOVE THREADS ACROSS CPU GROUPS + uint16_t org_gn = heap_select::find_cpu_group_from_heap_no(org_hp->heap_number); + uint16_t max_gn = heap_select::find_cpu_group_from_heap_no(max_hp->heap_number); + if (org_gn == max_gn) //only set within CPU group, so SetThreadIdealProcessor is enough + { + uint16_t group_proc_no = heap_select::find_group_proc_from_heap_no(max_hp->heap_number); + + GCThreadAffinity affinity; + affinity.Processor = group_proc_no; + affinity.Group = org_gn; + if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity)) + { + dprintf (3, ("Failed to set the ideal processor and group for heap %d.", + org_hp->heap_number)); + } + } + } + else + { + uint16_t proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number); GCThreadAffinity affinity; - affinity.Processor = group_proc_no; - affinity.Group = org_gn; + affinity.Processor = proc_no; + affinity.Group = GCThreadAffinity::None; + if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity)) { - dprintf (3, ("Failed to set the ideal processor and group for heap %d.", + dprintf (3, ("Failed to set the ideal processor for heap %d.", org_hp->heap_number)); } } } - else - { - uint16_t proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number); - - GCThreadAffinity affinity; - affinity.Processor = proc_no; - affinity.Group = GCThreadAffinity::None; - - if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity)) - { - dprintf (3, ("Failed to set the ideal processor for heap %d.", - org_hp->heap_number)); - } - } dprintf (3, ("Switching context %p (home heap %d) ", acontext, acontext->get_home_heap()->pGenGCHeap->heap_number)); @@ -13696,7 +13845,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size, allocator* gen_allocator = generation_allocator (gen); BOOL discard_p = gen_allocator->discard_if_no_fit_p (); - int pad_in_front = (old_loc != 0)? USE_PADDING_FRONT : 0; + int pad_in_front = ((old_loc != 0) && ((from_gen_number+1) != max_generation)) ? USE_PADDING_FRONT : 0; size_t real_size = size + Align (min_obj_size); if (pad_in_front) @@ -13729,6 +13878,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size, remove_gen_free (gen->gen_num, free_list_size); adjust_limit (free_list, free_list_size, gen, from_gen_number+1); + generation_allocate_end_seg_p (gen) = FALSE; goto finished; } // We do first fit on bucket 0 because we are not guaranteed to find a fit there. @@ -13751,7 +13901,6 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size, sz_list = sz_list * 2; } //go back to the beginning of the segment list - generation_allocate_end_seg_p (gen) = TRUE; heap_segment* seg = heap_segment_rw (generation_start_segment (gen)); if (seg != generation_allocation_segment (gen)) { @@ -13768,6 +13917,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size, heap_segment_committed (seg) - heap_segment_plan_allocated (seg), gen, from_gen_number+1); + generation_allocate_end_seg_p (gen) = TRUE; // dformat (t, 3, "Expanding segment allocation"); heap_segment_plan_allocated (seg) = heap_segment_committed (seg); @@ -13784,6 +13934,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size, heap_segment_committed (seg) - heap_segment_plan_allocated (seg), gen, from_gen_number+1); + generation_allocate_end_seg_p (gen) = TRUE; heap_segment_plan_allocated (seg) = heap_segment_committed (seg); @@ -13945,7 +14096,7 @@ uint8_t* gc_heap::allocate_in_expanded_heap (generation* gen, size = Align (size); assert (size >= Align (min_obj_size)); - int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0; + int pad_in_front = ((old_loc != 0) && (active_new_gen_number != max_generation)) ? USE_PADDING_FRONT : 0; if (consider_bestfit && use_bestfit) { @@ -14182,11 +14333,10 @@ uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen, to_gen_number = from_gen_number + (settings.promotion ? 1 : 0); } - dprintf (3, ("aic gen%d: s: %Id, %d->%d, %Ix->%Ix", gen->gen_num, size, from_gen_number, - to_gen_number, generation_allocation_pointer(gen), generation_allocation_limit(gen))); - - int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0; + dprintf (3, ("aic gen%d: s: %Id", gen->gen_num, size)); + int pad_in_front = ((old_loc != 0) && (to_gen_number != max_generation)) ? USE_PADDING_FRONT : 0; + if ((from_gen_number != -1) && (from_gen_number != (int)max_generation) && settings.promotion) { generation_condemned_allocated (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size; @@ -14424,27 +14574,32 @@ inline int power (int x, int y) } int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation, - int n_initial, + int initial_gen, + int current_gen, BOOL* blocking_collection_p STRESS_HEAP_ARG(int n_original)) { - int n = n_initial; + int n = current_gen; #ifdef MULTIPLE_HEAPS - BOOL blocking_p = *blocking_collection_p; - if (!blocking_p) + BOOL joined_last_gc_before_oom = FALSE; + for (int i = 0; i < n_heaps; i++) { - for (int i = 0; i < n_heaps; i++) + if (g_heaps[i]->last_gc_before_oom) { - if (g_heaps[i]->last_gc_before_oom) - { - dprintf (GTC_LOG, ("h%d is setting blocking to TRUE", i)); - *blocking_collection_p = TRUE; - break; - } + dprintf (GTC_LOG, ("h%d is setting blocking to TRUE", i)); + joined_last_gc_before_oom = TRUE; + break; } } +#else + BOOL joined_last_gc_before_oom = last_gc_before_oom; #endif //MULTIPLE_HEAPS + if (joined_last_gc_before_oom && settings.pause_mode != pause_low_latency) + { + assert (*blocking_collection_p); + } + if (should_evaluate_elevation && (n == max_generation)) { dprintf (GTC_LOG, ("lock: %d(%d)", @@ -14475,11 +14630,50 @@ int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation, settings.elevation_locked_count = 0; } + if (provisional_mode_triggered && (n == max_generation)) + { + // There are a few cases where we should not reduce the generation. + if ((initial_gen == max_generation) || (settings.reason == reason_alloc_loh)) + { + // If we are doing a full GC in the provisional mode, we always + // make it blocking because we don't want to get into a situation + // where foreground GCs are asking for a compacting full GC right away + // and not getting it. + dprintf (GTC_LOG, ("full GC induced, not reducing gen")); + *blocking_collection_p = TRUE; + } + else if (should_expand_in_full_gc || joined_last_gc_before_oom) + { + dprintf (GTC_LOG, ("need full blocking GCs to expand heap or avoid OOM, not reducing gen")); + assert (*blocking_collection_p); + } + else + { + dprintf (GTC_LOG, ("reducing gen in PM: %d->%d->%d", initial_gen, n, (max_generation - 1))); + n = max_generation - 1; + } + } + + if (should_expand_in_full_gc) + { + should_expand_in_full_gc = FALSE; + } + + if ((n == max_generation) && (*blocking_collection_p == FALSE)) + { + // If we are doing a gen2 we should reset elevation regardless and let the gen2 + // decide if we should lock again or in the bgc case by design we will not retract + // gen1 start. + settings.should_lock_elevation = FALSE; + settings.elevation_locked_count = 0; + dprintf (1, ("doing bgc, reset elevation")); + } + #ifdef STRESS_HEAP #ifdef BACKGROUND_GC // We can only do Concurrent GC Stress if the caller did not explicitly ask for all // generations to be collected, - + // // [LOCALGC TODO] STRESS_HEAP is not defined for a standalone GC so there are multiple // things that need to be fixed in this code block. if (n_original != max_generation && @@ -14789,21 +14983,24 @@ int gc_heap::generation_to_condemn (int n_initial, local_condemn_reasons->set_condition (gen_low_ephemeral_p); dprintf (GTC_LOG, ("h%d: low eph", heap_number)); + if (!provisional_mode_triggered) + { #ifdef BACKGROUND_GC - if (!gc_can_use_concurrent || (generation_free_list_space (generation_of (max_generation)) == 0)) + if (!gc_can_use_concurrent || (generation_free_list_space (generation_of (max_generation)) == 0)) #endif //BACKGROUND_GC - { - //It is better to defragment first if we are running out of space for - //the ephemeral generation but we have enough fragmentation to make up for it - //in the non ephemeral generation. Essentially we are trading a gen2 for - // having to expand heap in ephemeral collections. - if (dt_high_frag_p (tuning_deciding_condemned_gen, - max_generation - 1, - TRUE)) { - high_fragmentation = TRUE; - local_condemn_reasons->set_condition (gen_max_high_frag_e_p); - dprintf (GTC_LOG, ("heap%d: gen1 frag", heap_number)); + //It is better to defragment first if we are running out of space for + //the ephemeral generation but we have enough fragmentation to make up for it + //in the non ephemeral generation. Essentially we are trading a gen2 for + // having to expand heap in ephemeral collections. + if (dt_high_frag_p (tuning_deciding_condemned_gen, + max_generation - 1, + TRUE)) + { + high_fragmentation = TRUE; + local_condemn_reasons->set_condition (gen_max_high_frag_e_p); + dprintf (GTC_LOG, ("heap%d: gen1 frag", heap_number)); + } } } } @@ -14919,10 +15116,6 @@ int gc_heap::generation_to_condemn (int n_initial, { dprintf (GTC_LOG, ("h%d: expand_in_full - BLOCK", heap_number)); *blocking_collection_p = TRUE; - if (!check_only_p) - { - should_expand_in_full_gc = FALSE; - } evaluate_elevation = FALSE; n = max_generation; local_condemn_reasons->set_condition (gen_expand_fullgc_p); @@ -14935,7 +15128,9 @@ int gc_heap::generation_to_condemn (int n_initial, *blocking_collection_p = TRUE; if ((local_settings->reason == reason_oos_loh) || (local_settings->reason == reason_alloc_loh)) + { evaluate_elevation = FALSE; + } local_condemn_reasons->set_condition (gen_before_oom); } @@ -14963,7 +15158,7 @@ int gc_heap::generation_to_condemn (int n_initial, } } - if (evaluate_elevation && (low_ephemeral_space || high_memory_load || v_high_memory_load)) + if (!provisional_mode_triggered && evaluate_elevation && (low_ephemeral_space || high_memory_load || v_high_memory_load)) { *elevation_requested_p = TRUE; #ifdef BIT64 @@ -15018,7 +15213,7 @@ int gc_heap::generation_to_condemn (int n_initial, #endif // BIT64 } - if ((n == (max_generation - 1)) && (n_alloc < (max_generation -1))) + if (!provisional_mode_triggered && (n == (max_generation - 1)) && (n_alloc < (max_generation -1))) { dprintf (GTC_LOG, ("h%d: budget %d, check 2", heap_number, n_alloc)); @@ -15031,7 +15226,7 @@ int gc_heap::generation_to_condemn (int n_initial, } //figure out if max_generation is too fragmented -> blocking collection - if (n == max_generation) + if (!provisional_mode_triggered && (n == max_generation)) { if (dt_high_frag_p (tuning_deciding_condemned_gen, n)) { @@ -15299,6 +15494,17 @@ void gc_heap::gc1() int n = settings.condemned_generation; + if (settings.reason == reason_pm_full_gc) + { + assert (n == max_generation); + init_records(); + + gen_to_condemn_tuning* local_condemn_reasons = &(get_gc_data_per_heap()->gen_to_condemn_reasons); + local_condemn_reasons->init(); + local_condemn_reasons->set_gen (gen_initial, n); + local_condemn_reasons->set_gen (gen_final_per_heap, n); + } + update_collection_counts (); #ifdef BACKGROUND_GC @@ -15769,6 +15975,7 @@ void gc_heap::gc1() #endif //FEATURE_LOH_COMPACTION fire_pevents(); + pm_full_gc_init_or_clear(); gc_t_join.restart(); } @@ -15791,6 +15998,8 @@ void gc_heap::gc1() do_post_gc(); } + pm_full_gc_init_or_clear(); + #ifdef BACKGROUND_GC recover_bgc_settings(); #endif //BACKGROUND_GC @@ -16485,6 +16694,9 @@ void gc_heap::allocate_for_no_gc_after_gc() void gc_heap::init_records() { + // An option is to move this to be after we figure out which gen to condemn so we don't + // need to clear some generations' data 'cause we know they don't change, but that also means + // we can't simply call memset here. memset (&gc_data_per_heap, 0, sizeof (gc_data_per_heap)); gc_data_per_heap.heap_index = heap_number; if (heap_number == 0) @@ -16493,9 +16705,68 @@ void gc_heap::init_records() #ifdef GC_CONFIG_DRIVEN memset (interesting_data_per_gc, 0, sizeof (interesting_data_per_gc)); #endif //GC_CONFIG_DRIVEN + memset (&fgm_result, 0, sizeof (fgm_result)); + + for (int i = 0; i <= (max_generation + 1); i++) + { + gc_data_per_heap.gen_data[i].size_before = generation_size (i); + generation* gen = generation_of (i); + gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen); + gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen); + } + + sufficient_gen0_space_p = FALSE; +#if defined (_DEBUG) && defined (VERIFY_HEAP) + verify_pinned_queue_p = FALSE; +#endif // _DEBUG && VERIFY_HEAP } -int gc_heap::garbage_collect (int n) +void gc_heap::pm_full_gc_init_or_clear() +{ + // This means the next GC will be a full blocking GC and we need to init. + if (settings.condemned_generation == (max_generation - 1)) + { + if (pm_trigger_full_gc) + { +#ifdef MULTIPLE_HEAPS + do_post_gc(); +#endif //MULTIPLE_HEAPS + dprintf (GTC_LOG, ("init for PM triggered full GC")); + uint32_t saved_entry_memory_load = settings.entry_memory_load; + settings.init_mechanisms(); + settings.reason = reason_pm_full_gc; + settings.condemned_generation = max_generation; + settings.entry_memory_load = saved_entry_memory_load; + // Can't assert this since we only check at the end of gen2 GCs, + // during gen1 the memory load could have already dropped. + // Although arguably we should just turn off PM then... + //assert (settings.entry_memory_load >= high_memory_load_th); + assert (settings.entry_memory_load > 0); + settings.gc_index += 1; + do_pre_gc(); + } + } + // This means we are in the progress of a full blocking GC triggered by + // this PM mode. + else if (settings.reason == reason_pm_full_gc) + { + assert (settings.condemned_generation == max_generation); + assert (pm_trigger_full_gc); + pm_trigger_full_gc = false; + + dprintf (GTC_LOG, ("PM triggered full GC done")); + } +} + +void gc_heap::garbage_collect_pm_full_gc() +{ + assert (settings.condemned_generation == max_generation); + assert (settings.reason == reason_pm_full_gc); + assert (!settings.concurrent); + gc1(); +} + +void gc_heap::garbage_collect (int n) { //reset the number of alloc contexts alloc_contexts_used = 0; @@ -16539,13 +16810,10 @@ int gc_heap::garbage_collect (int n) } init_records(); - memset (&fgm_result, 0, sizeof (fgm_result)); settings.reason = gc_trigger_reason; - verify_pinned_queue_p = FALSE; - #if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE) - num_pinned_objects = 0; + num_pinned_objects = 0; #endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE #ifdef STRESS_HEAP @@ -16560,97 +16828,93 @@ int gc_heap::garbage_collect (int n) //align all heaps on the max generation to condemn dprintf (3, ("Joining for max generation to condemn")); condemned_generation_num = generation_to_condemn (n, - &blocking_collection, - &elevation_requested, - FALSE); + &blocking_collection, + &elevation_requested, + FALSE); gc_t_join.join(this, gc_join_generation_determined); if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { -#ifdef MULTIPLE_HEAPS #if !defined(SEG_MAPPING_TABLE) && !defined(FEATURE_BASICFREEZE) //delete old slots from the segment table seg_table->delete_old_slots(); #endif //!SEG_MAPPING_TABLE && !FEATURE_BASICFREEZE + +#ifdef MULTIPLE_HEAPS for (int i = 0; i < n_heaps; i++) { - //copy the card and brick tables - if (g_gc_card_table != g_heaps[i]->card_table) - { - g_heaps[i]->copy_brick_card_table(); - } + gc_heap* hp = g_heaps[i]; + // check for card table growth + if (g_gc_card_table != hp->card_table) + hp->copy_brick_card_table(); - g_heaps[i]->rearrange_large_heap_segments(); - if (!recursive_gc_sync::background_running_p()) - { - g_heaps[i]->rearrange_small_heap_segments(); - } - } -#else //MULTIPLE_HEAPS + hp->rearrange_large_heap_segments(); #ifdef BACKGROUND_GC - //delete old slots from the segment table -#if !defined(SEG_MAPPING_TABLE) && !defined(FEATURE_BASICFREEZE) - seg_table->delete_old_slots(); -#endif //!SEG_MAPPING_TABLE && !FEATURE_BASICFREEZE - rearrange_large_heap_segments(); + hp->background_delay_delete_loh_segments(); if (!recursive_gc_sync::background_running_p()) - { - rearrange_small_heap_segments(); - } + hp->rearrange_small_heap_segments(); #endif //BACKGROUND_GC - // check for card table growth + } +#else //MULTIPLE_HEAPS if (g_gc_card_table != card_table) copy_brick_card_table(); + rearrange_large_heap_segments(); +#ifdef BACKGROUND_GC + background_delay_delete_loh_segments(); + if (!recursive_gc_sync::background_running_p()) + rearrange_small_heap_segments(); +#endif //BACKGROUND_GC #endif //MULTIPLE_HEAPS - BOOL should_evaluate_elevation = FALSE; - BOOL should_do_blocking_collection = FALSE; + BOOL should_evaluate_elevation = FALSE; + BOOL should_do_blocking_collection = FALSE; #ifdef MULTIPLE_HEAPS - int gen_max = condemned_generation_num; - for (int i = 0; i < n_heaps; i++) - { - if (gen_max < g_heaps[i]->condemned_generation_num) - gen_max = g_heaps[i]->condemned_generation_num; - if ((!should_evaluate_elevation) && (g_heaps[i]->elevation_requested)) - should_evaluate_elevation = TRUE; - if ((!should_do_blocking_collection) && (g_heaps[i]->blocking_collection)) - should_do_blocking_collection = TRUE; - } + int gen_max = condemned_generation_num; + for (int i = 0; i < n_heaps; i++) + { + if (gen_max < g_heaps[i]->condemned_generation_num) + gen_max = g_heaps[i]->condemned_generation_num; + if ((!should_evaluate_elevation) && (g_heaps[i]->elevation_requested)) + should_evaluate_elevation = TRUE; + if ((!should_do_blocking_collection) && (g_heaps[i]->blocking_collection)) + should_do_blocking_collection = TRUE; + } - settings.condemned_generation = gen_max; + settings.condemned_generation = gen_max; #else //MULTIPLE_HEAPS - settings.condemned_generation = generation_to_condemn (n, - &blocking_collection, - &elevation_requested, - FALSE); - should_evaluate_elevation = elevation_requested; - should_do_blocking_collection = blocking_collection; -#endif //MULTIPLE_HEAPS - - settings.condemned_generation = joined_generation_to_condemn ( - should_evaluate_elevation, - settings.condemned_generation, - &should_do_blocking_collection - STRESS_HEAP_ARG(n) - ); + settings.condemned_generation = generation_to_condemn (n, + &blocking_collection, + &elevation_requested, + FALSE); + should_evaluate_elevation = elevation_requested; + should_do_blocking_collection = blocking_collection; +#endif //MULTIPLE_HEAPS + + settings.condemned_generation = joined_generation_to_condemn ( + should_evaluate_elevation, + n, + settings.condemned_generation, + &should_do_blocking_collection + STRESS_HEAP_ARG(n) + ); - STRESS_LOG1(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10, - "condemned generation num: %d\n", settings.condemned_generation); + STRESS_LOG1(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10, + "condemned generation num: %d\n", settings.condemned_generation); - record_gcs_during_no_gc(); + record_gcs_during_no_gc(); - if (settings.condemned_generation > 1) - settings.promotion = TRUE; + if (settings.condemned_generation > 1) + settings.promotion = TRUE; #ifdef HEAP_ANALYZE - // At this point we've decided what generation is condemned - // See if we've been requested to analyze survivors after the mark phase - if (GCToEEInterface::AnalyzeSurvivorsRequested(settings.condemned_generation)) - { - heap_analyze_enabled = TRUE; - } + // At this point we've decided what generation is condemned + // See if we've been requested to analyze survivors after the mark phase + if (GCToEEInterface::AnalyzeSurvivorsRequested(settings.condemned_generation)) + { + heap_analyze_enabled = TRUE; + } #endif // HEAP_ANALYZE GCToEEInterface::DiagGCStart(settings.condemned_generation, settings.reason == reason_induced); @@ -16683,7 +16947,7 @@ int gc_heap::garbage_collect (int n) // Call the EE for start of GC work // just one thread for MP GC GCToEEInterface::GcStartWork (settings.condemned_generation, - max_generation); + max_generation); // TODO: we could fire an ETW event to say this GC as a concurrent GC but later on due to not being able to // create threads or whatever, this could be a non concurrent GC. Maybe for concurrent GC we should fire @@ -16699,18 +16963,7 @@ int gc_heap::garbage_collect (int n) #endif //MULTIPLE_HEAPS } - { - int gen_num_for_data = max_generation + 1; - for (int i = 0; i <= gen_num_for_data; i++) - { - gc_data_per_heap.gen_data[i].size_before = generation_size (i); - generation* gen = generation_of (i); - gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen); - gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen); - } - } - descr_generations (TRUE); -// descr_card_table(); + descr_generations (TRUE); #ifdef VERIFY_HEAP if ((GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) && @@ -16907,8 +17160,6 @@ done: if (settings.pause_mode == pause_no_gc) allocate_for_no_gc_after_gc(); - int gn = settings.condemned_generation; - return gn; } #define mark_stack_empty_p() (mark_stack_base == mark_stack_tos) @@ -17221,6 +17472,22 @@ uint8_t* gc_heap::find_object (uint8_t* o, uint8_t* low) } #endif //INTERIOR_POINTERS +#ifdef MULTIPLE_HEAPS + +#ifdef MARK_LIST +#ifdef GC_CONFIG_DRIVEN +#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}} +#else //GC_CONFIG_DRIVEN +#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}} +#endif //GC_CONFIG_DRIVEN +#else //MARK_LIST +#define m_boundary(o) {} +#endif //MARK_LIST + +#define m_boundary_fullgc(o) {} + +#else //MULTIPLE_HEAPS + #ifdef MARK_LIST #ifdef GC_CONFIG_DRIVEN #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;} @@ -17233,6 +17500,8 @@ uint8_t* gc_heap::find_object (uint8_t* o, uint8_t* low) #define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;} +#endif //MULTIPLE_HEAPS + #define method_table(o) ((CObjectHeader*)(o))->GetMethodTable() inline @@ -17543,7 +17812,9 @@ void gc_heap::save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_obje record_interesting_data_point (idp_post_short_padded); #endif //SHORT_PLUGS m.set_post_short(); +#if defined (_DEBUG) && defined (VERIFY_HEAP) verify_pinned_queue_p = TRUE; +#endif // _DEBUG && VERIFY_HEAP #ifdef COLLECTIBLE_CLASS if (is_collectible (last_object_in_last_plug)) @@ -19489,6 +19760,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) { #endif //MULTIPLE_HEAPS + maxgen_size_inc_p = false; + num_sizedrefs = GCToEEInterface::GetTotalNumSizedRefHandles(); #ifdef MULTIPLE_HEAPS @@ -19537,8 +19810,10 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) mark_list_index = &mark_list [0]; #endif //MARK_LIST +#ifndef MULTIPLE_HEAPS shigh = (uint8_t*) 0; slow = MAX_PTR; +#endif //MULTIPLE_HEAPS //%type% category = quote (mark); @@ -19755,7 +20030,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) GCScan::GcWeakPtrScanBySingleThread (condemned_gen_number, max_generation, &sc); #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING - if (g_fEnableARM) + if (g_fEnableAppDomainMonitoring) { size_t promoted_all_heaps = 0; #ifdef MULTIPLE_HEAPS @@ -19766,7 +20041,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) #else promoted_all_heaps = promoted_bytes (heap_number); #endif //MULTIPLE_HEAPS - SystemDomain::RecordTotalSurvivedBytes (promoted_all_heaps); + GCToEEInterface::RecordTotalSurvivedBytes(promoted_all_heaps); } #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING @@ -21385,6 +21660,9 @@ void gc_heap::plan_phase (int condemned_gen_number) #ifdef GC_CONFIG_DRIVEN dprintf (3, ("total number of marked objects: %Id (%Id)", (mark_list_index - &mark_list[0]), ((mark_list_end - &mark_list[0])))); + + if (mark_list_index >= (mark_list_end + 1)) + mark_list_index = mark_list_end + 1; #else dprintf (3, ("mark_list length: %Id", (mark_list_index - &mark_list[0]))); @@ -21769,7 +22047,7 @@ void gc_heap::plan_phase (int condemned_gen_number) dprintf(4, ("+%Ix+", (size_t)xl)); assert ((size (xl) > 0)); - assert ((size (xl) <= LARGE_OBJECT_SIZE)); + assert ((size (xl) <= loh_size_threshold)); last_object_in_plug = xl; @@ -21908,7 +22186,10 @@ void gc_heap::plan_phase (int condemned_gen_number) } else { - allocate_in_condemned = TRUE; + if (generation_allocator(older_gen)->discard_if_no_fit_p()) + { + allocate_in_condemned = TRUE; + } new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number, #ifdef SHORT_PLUGS @@ -21966,8 +22247,10 @@ void gc_heap::plan_phase (int condemned_gen_number) if (pinned_plug_p) { if (fire_pinned_plug_events_p) + { FIRE_EVENT(PinPlugAtGCTime, plug_start, plug_end, (merge_with_last_pin_p ? 0 : (uint8_t*)node_gap_size (plug_start))); + } if (merge_with_last_pin_p) { @@ -22015,7 +22298,7 @@ void gc_heap::plan_phase (int condemned_gen_number) set_node_relocation_distance (plug_start, (new_address - plug_start)); if (last_node && (node_relocation_distance (last_node) == (node_relocation_distance (plug_start) + - node_gap_size (plug_start)))) + (ptrdiff_t)node_gap_size (plug_start)))) { //dprintf(3,( " Lb")); dprintf (3, ("%Ix Lb", plug_start)); @@ -22253,15 +22536,16 @@ void gc_heap::plan_phase (int condemned_gen_number) if (growth > 0) { - dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, gen1 c alloc: %Id", - growth, generation_end_seg_allocated (generation_of (max_generation)), - generation_condemned_allocated (generation_of (max_generation - 1)))); + dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, condemned alloc: %Id", + growth, end_seg_allocated, condemned_allocated)); + + maxgen_size_inc_p = true; } else { - dprintf (1, ("gen2 shrank %Id (end seg alloc: %Id, gen1 c alloc: %Id", - (old_gen2_size - plan_gen2_size), generation_end_seg_allocated (generation_of (max_generation)), - generation_condemned_allocated (generation_of (max_generation - 1)))); + dprintf (2, ("gen2 shrank %Id (end seg alloc: %Id, gen1 c alloc: %Id", + (old_gen2_size - plan_gen2_size), generation_end_seg_allocated (generation_of (max_generation)), + generation_condemned_allocated (generation_of (max_generation - 1)))); } generation* older_gen = generation_of (settings.condemned_generation + 1); @@ -22333,10 +22617,11 @@ void gc_heap::plan_phase (int condemned_gen_number) #ifdef BIT64 if ((!settings.concurrent) && + !provisional_mode_triggered && ((condemned_gen_number < max_generation) && ((settings.gen0_reduction_count > 0) || (settings.entry_memory_load >= 95)))) { - dprintf (2, ("gen0 reduction count is %d, condemning %d, mem load %d", + dprintf (GTC_LOG, ("gen0 reduction count is %d, condemning %d, mem load %d", settings.gen0_reduction_count, condemned_gen_number, settings.entry_memory_load)); @@ -22348,7 +22633,7 @@ void gc_heap::plan_phase (int condemned_gen_number) if ((condemned_gen_number >= (max_generation - 1)) && dt_low_ephemeral_space_p (tuning_deciding_expansion)) { - dprintf (2, ("Not enough space for all ephemeral generations with compaction")); + dprintf (GTC_LOG, ("Not enough space for all ephemeral generations with compaction")); should_expand = TRUE; } } @@ -22427,91 +22712,99 @@ void gc_heap::plan_phase (int condemned_gen_number) } } - settings.demotion = FALSE; - int pol_max = policy_sweep; + if (maxgen_size_inc_p && provisional_mode_triggered) + { + pm_trigger_full_gc = true; + dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2")); + } + else + { + settings.demotion = FALSE; + int pol_max = policy_sweep; #ifdef GC_CONFIG_DRIVEN - BOOL is_compaction_mandatory = FALSE; + BOOL is_compaction_mandatory = FALSE; #endif //GC_CONFIG_DRIVEN - int i; - for (i = 0; i < n_heaps; i++) - { - if (pol_max < g_heaps[i]->gc_policy) - pol_max = policy_compact; - // set the demotion flag is any of the heap has demotion - if (g_heaps[i]->demotion_high >= g_heaps[i]->demotion_low) + int i; + for (i = 0; i < n_heaps; i++) { - (g_heaps[i]->get_gc_data_per_heap())->set_mechanism_bit (gc_demotion_bit); - settings.demotion = TRUE; - } + if (pol_max < g_heaps[i]->gc_policy) + pol_max = policy_compact; + // set the demotion flag is any of the heap has demotion + if (g_heaps[i]->demotion_high >= g_heaps[i]->demotion_low) + { + (g_heaps[i]->get_gc_data_per_heap())->set_mechanism_bit (gc_demotion_bit); + settings.demotion = TRUE; + } #ifdef GC_CONFIG_DRIVEN - if (!is_compaction_mandatory) - { - int compact_reason = (g_heaps[i]->get_gc_data_per_heap())->get_mechanism (gc_heap_compact); - if (compact_reason >= 0) + if (!is_compaction_mandatory) { - if (gc_heap_compact_reason_mandatory_p[compact_reason]) - is_compaction_mandatory = TRUE; + int compact_reason = (g_heaps[i]->get_gc_data_per_heap())->get_mechanism (gc_heap_compact); + if (compact_reason >= 0) + { + if (gc_heap_compact_reason_mandatory_p[compact_reason]) + is_compaction_mandatory = TRUE; + } } - } #endif //GC_CONFIG_DRIVEN - } + } #ifdef GC_CONFIG_DRIVEN - if (!is_compaction_mandatory) - { - // If compaction is not mandatory we can feel free to change it to a sweeping GC. - // Note that we may want to change this to only checking every so often instead of every single GC. - if (should_do_sweeping_gc (pol_max >= policy_compact)) - { - pol_max = policy_sweep; - } - else + if (!is_compaction_mandatory) { - if (pol_max == policy_sweep) - pol_max = policy_compact; + // If compaction is not mandatory we can feel free to change it to a sweeping GC. + // Note that we may want to change this to only checking every so often instead of every single GC. + if (should_do_sweeping_gc (pol_max >= policy_compact)) + { + pol_max = policy_sweep; + } + else + { + if (pol_max == policy_sweep) + pol_max = policy_compact; + } } - } #endif //GC_CONFIG_DRIVEN - for (i = 0; i < n_heaps; i++) - { - if (pol_max > g_heaps[i]->gc_policy) - g_heaps[i]->gc_policy = pol_max; - //get the segment while we are serialized - if (g_heaps[i]->gc_policy == policy_expand) + for (i = 0; i < n_heaps; i++) { - g_heaps[i]->new_heap_segment = - g_heaps[i]->soh_get_segment_to_expand(); - if (!g_heaps[i]->new_heap_segment) + if (pol_max > g_heaps[i]->gc_policy) + g_heaps[i]->gc_policy = pol_max; + //get the segment while we are serialized + if (g_heaps[i]->gc_policy == policy_expand) { - set_expand_in_full_gc (condemned_gen_number); - //we are out of memory, cancel the expansion - g_heaps[i]->gc_policy = policy_compact; + g_heaps[i]->new_heap_segment = + g_heaps[i]->soh_get_segment_to_expand(); + if (!g_heaps[i]->new_heap_segment) + { + set_expand_in_full_gc (condemned_gen_number); + //we are out of memory, cancel the expansion + g_heaps[i]->gc_policy = policy_compact; + } } } - } - - BOOL is_full_compacting_gc = FALSE; - if ((gc_policy >= policy_compact) && (condemned_gen_number == max_generation)) - { - full_gc_counts[gc_type_compacting]++; - is_full_compacting_gc = TRUE; - } + BOOL is_full_compacting_gc = FALSE; - for (i = 0; i < n_heaps; i++) - { - //copy the card and brick tables - if (g_gc_card_table!= g_heaps[i]->card_table) + if ((gc_policy >= policy_compact) && (condemned_gen_number == max_generation)) { - g_heaps[i]->copy_brick_card_table(); + full_gc_counts[gc_type_compacting]++; + is_full_compacting_gc = TRUE; } - if (is_full_compacting_gc) + for (i = 0; i < n_heaps; i++) { - g_heaps[i]->loh_alloc_since_cg = 0; + //copy the card and brick tables + if (g_gc_card_table!= g_heaps[i]->card_table) + { + g_heaps[i]->copy_brick_card_table(); + } + + if (is_full_compacting_gc) + { + g_heaps[i]->loh_alloc_since_cg = 0; + } } } @@ -22532,31 +22825,66 @@ void gc_heap::plan_phase (int condemned_gen_number) rearrange_large_heap_segments (); } - settings.demotion = ((demotion_high >= demotion_low) ? TRUE : FALSE); - if (settings.demotion) - get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit); + if (maxgen_size_inc_p && provisional_mode_triggered) + { + pm_trigger_full_gc = true; + dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2")); + } + else + { + settings.demotion = ((demotion_high >= demotion_low) ? TRUE : FALSE); + if (settings.demotion) + get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit); #ifdef GC_CONFIG_DRIVEN - BOOL is_compaction_mandatory = FALSE; - int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact); - if (compact_reason >= 0) - is_compaction_mandatory = gc_heap_compact_reason_mandatory_p[compact_reason]; + BOOL is_compaction_mandatory = FALSE; + int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact); + if (compact_reason >= 0) + is_compaction_mandatory = gc_heap_compact_reason_mandatory_p[compact_reason]; + + if (!is_compaction_mandatory) + { + if (should_do_sweeping_gc (should_compact)) + should_compact = FALSE; + else + should_compact = TRUE; + } +#endif //GC_CONFIG_DRIVEN - if (!is_compaction_mandatory) + if (should_compact && (condemned_gen_number == max_generation)) + { + full_gc_counts[gc_type_compacting]++; + loh_alloc_since_cg = 0; + } + } +#endif //MULTIPLE_HEAPS + + if (!pm_trigger_full_gc && pm_stress_on && provisional_mode_triggered) { - if (should_do_sweeping_gc (should_compact)) - should_compact = FALSE; - else - should_compact = TRUE; + if ((settings.condemned_generation == (max_generation - 1)) && + ((settings.gc_index % 5) == 0)) + { + pm_trigger_full_gc = true; + } } -#endif //GC_CONFIG_DRIVEN - if (should_compact && (condemned_gen_number == max_generation)) + if (settings.condemned_generation == (max_generation - 1)) { - full_gc_counts[gc_type_compacting]++; - loh_alloc_since_cg = 0; + if (provisional_mode_triggered) + { + if (should_expand) + { + should_expand = FALSE; + dprintf (GTC_LOG, ("h%d in PM cannot expand", heap_number)); + } + } + + if (pm_trigger_full_gc) + { + should_compact = FALSE; + dprintf (GTC_LOG, ("h%d PM doing sweeping", heap_number)); + } } -#endif //MULTIPLE_HEAPS if (should_compact) { @@ -23370,8 +23698,13 @@ uint8_t* tree_search (uint8_t* tree, uint8_t* old_address) #ifdef FEATURE_BASICFREEZE bool gc_heap::frozen_object_p (Object* obj) { +#ifdef MULTIPLE_HEAPS + ptrdiff_t delta = 0; + heap_segment* pSegment = segment_of ((uint8_t*)obj, delta); +#else //MULTIPLE_HEAPS heap_segment* pSegment = gc_heap::find_segment ((uint8_t*)obj, FALSE); _ASSERTE(pSegment); +#endif //MULTIPLE_HEAPS return heap_segment_read_only_p(pSegment); } @@ -25579,8 +25912,10 @@ void gc_heap::background_mark_phase () c_mark_list_index = 0; +#ifndef MULTIPLE_HEAPS shigh = (uint8_t*) 0; slow = MAX_PTR; +#endif //MULTIPLE_HEAPS generation* gen = generation_of (max_generation); @@ -26605,7 +26940,7 @@ void gc_heap::background_promote_callback (Object** ppObject, ScanContext* sc, #endif //_DEBUG dprintf (3, ("Concurrent Background Promote %Ix", (size_t)o)); - if (o && (size (o) > LARGE_OBJECT_SIZE)) + if (o && (size (o) > loh_size_threshold)) { dprintf (3, ("Brc %Ix", (size_t)o)); } @@ -26990,7 +27325,7 @@ void gc_heap::bgc_thread_function() // started and decided to do a BGC and waiting for a BGC thread to restart // vm. That GC will be waiting in wait_to_proceed and we are waiting for it // to restart the VM so we deadlock. - //gc_heap::disable_preemptive (current_thread, TRUE); + //gc_heap::disable_preemptive (true); } FIRE_EVENT(GCTerminateConcurrentThread_V1); @@ -27077,7 +27412,6 @@ void gc_heap::copy_cards (size_t dst_card, dsttmp |= 1 << dstbit; else dsttmp &= ~(1 << dstbit); - if (!(++srcbit % 32)) { srctmp = card_table[++srcwrd]; @@ -27346,11 +27680,9 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end) { cardb++; } - if (cardb == end_cardb) return FALSE; - // We found a bundle, so go through its words and find a non-zero card word uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb),cardw)]; uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1),cardw_end)]; while ((card_word < card_word_end) && !(*card_word)) @@ -27384,7 +27716,7 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end) while (card_word < card_word_end) { - if (*card_word != 0) + if ((*card_word) != 0) { cardw = (card_word - &card_table [0]); return TRUE; @@ -27392,9 +27724,10 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end) card_word++; } - return FALSE; + } + } #endif //CARD_BUNDLE @@ -27443,8 +27776,8 @@ BOOL gc_heap::find_card(uint32_t* card_table, { ++last_card_word; } - while ((last_card_word < &card_table [card_word_end]) && !(*last_card_word)); + while ((last_card_word < &card_table [card_word_end]) && !(*last_card_word)); if (last_card_word < &card_table [card_word_end]) { card_word_value = *last_card_word; @@ -27457,6 +27790,7 @@ BOOL gc_heap::find_card(uint32_t* card_table, #endif //CARD_BUNDLE } + // Look for the lowest bit set if (card_word_value) { @@ -27699,7 +28033,8 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating) size_t n_eph = 0; size_t n_gen = 0; size_t n_card_set = 0; - uint8_t* nhigh = (relocating ? heap_segment_plan_allocated (ephemeral_heap_segment) : high); + uint8_t* nhigh = (relocating ? + heap_segment_plan_allocated (ephemeral_heap_segment) : high); BOOL foundp = FALSE; uint8_t* start_address = 0; @@ -27719,7 +28054,6 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating) { if (card_of(last_object) > card) { - // cg means cross-generational dprintf (3, ("Found %Id cg pointers", cg_pointers_found)); if (cg_pointers_found == 0) { @@ -27736,18 +28070,14 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating) if (card >= end_card) { - // Find the first card that's set (between card and card_word_end) - foundp = find_card(card_table, card, card_word_end, end_card); + foundp = find_card (card_table, card, card_word_end, end_card); if (foundp) { - // We found card(s) set. n_card_set += end_card - card; start_address = max (beg, card_address (card)); } - limit = min (end, card_address (end_card)); } - if (!foundp || (last_object >= end) || (card_address (card) >= end)) { if (foundp && (cg_pointers_found == 0)) @@ -27758,10 +28088,8 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating) n_card_set -= (card_of (end) - card); total_cards_cleared += (card_of (end) - card); } - n_eph += cg_pointers_found; cg_pointers_found = 0; - if ((seg = heap_segment_next_in_range (seg)) != 0) { #ifdef BACKGROUND_GC @@ -27781,10 +28109,10 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating) } } - // We've found a card and will now go through the objects in it. assert (card_set_p (card)); { uint8_t* o = last_object; + o = find_first_object (start_address, last_object); // Never visit an object twice. assert (o >= last_object); @@ -29457,7 +29785,7 @@ void gc_heap::set_static_data() dprintf (GTC_LOG, ("PM: %d - min: %Id, max: %Id, fr_l: %Id, fr_b: %d%%", settings.pause_mode, dd->min_size, dd_max_size, - dd->fragmentation_limit, (int)(dd->fragmentation_burden_limit * 100))); + sdata->fragmentation_limit, (int)(sdata->fragmentation_burden_limit * 100))); } } @@ -30088,18 +30416,6 @@ void gc_heap::decommit_ephemeral_segment_pages() current_gc_data_per_heap->extra_gen0_committed = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment); } -size_t gc_heap::new_allocation_limit (size_t size, size_t free_size, int gen_number) -{ - dynamic_data* dd = dynamic_data_of (gen_number); - ptrdiff_t new_alloc = dd_new_allocation (dd); - assert (new_alloc == (ptrdiff_t)Align (new_alloc, - get_alignment_constant (!(gen_number == (max_generation+1))))); - size_t limit = min (max (new_alloc, (ptrdiff_t)size), (ptrdiff_t)free_size); - assert (limit == Align (limit, get_alignment_constant (!(gen_number == (max_generation+1))))); - dd_new_allocation (dd) = (new_alloc - limit ); - return limit; -} - //This is meant to be called by decide_on_compacting. size_t gc_heap::generation_fragmentation (generation* gen, @@ -30232,9 +30548,25 @@ BOOL gc_heap::decide_on_compacting (int condemned_gen_number, get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_induced_compacting); } + if (settings.reason == reason_pm_full_gc) + { + assert (condemned_gen_number == max_generation); + if (heap_number == 0) + { + dprintf (GTC_LOG, ("PM doing compacting full GC after a gen1")); + } + should_compact = TRUE; + } + dprintf (2, ("Fragmentation: %d Fragmentation burden %d%%", fragmentation, (int) (100*fragmentation_burden))); + if (provisional_mode_triggered && (condemned_gen_number == (max_generation - 1))) + { + dprintf (GTC_LOG, ("gen1 in PM always compact")); + should_compact = TRUE; + } + if (!should_compact) { if (dt_low_ephemeral_space_p (tuning_deciding_compaction)) @@ -30480,6 +30812,8 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp) { if (large_chunk_found) { + sufficient_gen0_space_p = TRUE; + dprintf (3, ("Enough room")); return TRUE; } @@ -30505,7 +30839,7 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp) if ((tp == tuning_deciding_condemned_gen) || (tp == tuning_deciding_full_gc)) { - end_space = 2*dd_min_size (dd); + end_space = max (2*dd_min_size (dd), end_space_after_gc()); } else { @@ -30532,23 +30866,11 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, int64_t& alloc_byte acontext.set_alloc_heap(vm_heap); #endif //MULTIPLE_HEAPS -#ifdef MARK_ARRAY - uint8_t* current_lowest_address = lowest_address; - uint8_t* current_highest_address = highest_address; -#ifdef BACKGROUND_GC - if (recursive_gc_sync::background_running_p()) - { - current_lowest_address = background_saved_lowest_address; - current_highest_address = background_saved_highest_address; - } -#endif //BACKGROUND_GC -#endif // MARK_ARRAY - - #if BIT64 +#if BIT64 size_t maxObjectSize = (INT64_MAX - 7 - Align(min_obj_size)); - #else +#else size_t maxObjectSize = (INT32_MAX - 7 - Align(min_obj_size)); - #endif +#endif if (jsize >= maxObjectSize) { @@ -30580,6 +30902,18 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, int64_t& alloc_byte #pragma inline_depth(20) #endif //_MSC_VER +#ifdef MARK_ARRAY + uint8_t* current_lowest_address = lowest_address; + uint8_t* current_highest_address = highest_address; +#ifdef BACKGROUND_GC + if (recursive_gc_sync::background_running_p()) + { + current_lowest_address = background_saved_lowest_address; + current_highest_address = background_saved_highest_address; + } +#endif //BACKGROUND_GC +#endif // MARK_ARRAY + #ifdef FEATURE_LOH_COMPACTION // The GC allocator made a free object already in this alloc context and // adjusted the alloc_ptr accordingly. @@ -30605,7 +30939,7 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, int64_t& alloc_byte #ifdef BACKGROUND_GC //the object has to cover one full mark uint32_t assert (size > mark_word_size); - if (current_c_gc_state == c_gc_state_marking) + if (current_c_gc_state != c_gc_state_free) { dprintf (3, ("Concurrent allocation of a large object %Ix", (size_t)obj)); @@ -30647,7 +30981,7 @@ void reset_memory (uint8_t* o, size_t sizeo) // We don't do unlock because there could be many processes using workstation GC and it's // bad perf to have many threads doing unlock at the same time. bool unlock_p = false; -#endif // MULTIPLE_HEAPS +#endif //MULTIPLE_HEAPS reset_mm_p = GCToOSInterface::VirtualReset((void*)page_start, size, unlock_p); } @@ -30775,6 +31109,30 @@ BOOL gc_heap::background_object_marked (uint8_t* o, BOOL clearp) return m; } +void gc_heap::background_delay_delete_loh_segments() +{ + generation* gen = large_object_generation; + heap_segment* seg = heap_segment_rw (generation_start_segment (large_object_generation)); + heap_segment* prev_seg = 0; + + while (seg) + { + heap_segment* next_seg = heap_segment_next (seg); + if (seg->flags & heap_segment_flags_loh_delete) + { + dprintf (3, ("deleting %Ix-%Ix-%Ix", (size_t)seg, heap_segment_allocated (seg), heap_segment_reserved (seg))); + delete_heap_segment (seg, (GCConfig::GetRetainVM() != 0)); + heap_segment_next (prev_seg) = next_seg; + } + else + { + prev_seg = seg; + } + + seg = next_seg; + } +} + uint8_t* gc_heap::background_next_end (heap_segment* seg, BOOL large_objects_p) { return @@ -30804,12 +31162,15 @@ void gc_heap::generation_delete_heap_segment (generation* gen, dprintf (3, ("bgc sweep: deleting seg %Ix", seg)); if (gen == large_object_generation) { - heap_segment_next (prev_seg) = next_seg; - dprintf (3, ("Preparing empty large segment %Ix for deletion", (size_t)seg)); - heap_segment_next (seg) = freeable_large_heap_segment; - freeable_large_heap_segment = seg; + // We cannot thread segs in here onto freeable_large_heap_segment because + // grow_brick_card_tables could be committing mark array which needs to read + // the seg list. So we delay it till next time we suspend EE. + seg->flags |= heap_segment_flags_loh_delete; + // Since we will be decommitting the seg, we need to prevent heap verification + // to verify this segment. + heap_segment_allocated (seg) = heap_segment_mem (seg); } else { @@ -30840,22 +31201,20 @@ void gc_heap::process_background_segment_end (heap_segment* seg, *delete_p = FALSE; uint8_t* allocated = heap_segment_allocated (seg); uint8_t* background_allocated = heap_segment_background_allocated (seg); + BOOL loh_p = heap_segment_loh_p (seg); dprintf (3, ("Processing end of background segment [%Ix, %Ix[(%Ix[)", (size_t)heap_segment_mem (seg), background_allocated, allocated)); - - if (allocated != background_allocated) + if (!loh_p && (allocated != background_allocated)) { - if (gen == large_object_generation) - { - FATAL_GC_ERROR(); - } + assert (gen != large_object_generation); dprintf (3, ("Make a free object before newly promoted objects [%Ix, %Ix[", (size_t)last_plug_end, background_allocated)); thread_gap (last_plug_end, background_allocated - last_plug_end, generation_of (max_generation)); + fix_brick_to_highest (last_plug_end, background_allocated); // When we allowed fgc's during going through gaps, we could have erased the brick @@ -31221,6 +31580,7 @@ void gc_heap::background_sweep() FIRE_EVENT(BGC2ndNonConEnd); + loh_alloc_thread_count = 0; current_bgc_state = bgc_sweep_soh; verify_soh_segment_list(); @@ -31269,6 +31629,8 @@ void gc_heap::background_sweep() background_ephemeral_sweep(); + concurrent_print_time_delta ("Swe eph"); + #ifdef MULTIPLE_HEAPS bgc_t_join.join(this, gc_join_after_ephemeral_sweep); if (bgc_t_join.joined()) @@ -31362,6 +31724,21 @@ void gc_heap::background_sweep() if (gen != large_object_generation) { dprintf (2, ("bgs: sweeping gen3 objects")); + concurrent_print_time_delta ("Swe SOH"); + FIRE_EVENT(BGC1stSweepEnd, 0); + + enter_spin_lock (&more_space_lock_loh); + add_saved_spinlock_info (true, me_acquire, mt_bgc_loh_sweep); + + concurrent_print_time_delta ("Swe LOH took msl"); + + // We wait till all allocating threads are completely done. + int spin_count = yp_spin_count_unit; + while (loh_alloc_thread_count) + { + spin_and_switch (spin_count, (loh_alloc_thread_count == 0)); + } + current_bgc_state = bgc_sweep_loh; gen = generation_of (max_generation+1); start_seg = heap_segment_rw (generation_start_segment (gen)); @@ -31506,6 +31883,9 @@ void gc_heap::background_sweep() reset_seg = heap_segment_next_rw (reset_seg); } + generation* loh_gen = generation_of (max_generation + 1); + generation_allocation_segment (loh_gen) = heap_segment_rw (generation_start_segment (loh_gen)); + // We calculate dynamic data here because if we wait till we signal the lh event, // the allocation thread can change the fragmentation and we may read an intermediate // value (which can be greater than the generation size). Plus by that time it won't @@ -31537,6 +31917,9 @@ void gc_heap::background_sweep() gc_lh_block_event.Set(); } + add_saved_spinlock_info (true, me_release, mt_bgc_loh_sweep); + leave_spin_lock (&more_space_lock_loh); + //dprintf (GTC_LOG, ("---- (GC%d)End Background Sweep Phase ----", VolatileLoad(&settings.gc_index))); dprintf (GTC_LOG, ("---- (GC%d)ESw ----", VolatileLoad(&settings.gc_index))); } @@ -33249,7 +33632,7 @@ gc_heap::verify_heap (BOOL begin_gc_p) if (current_join->joined()) #endif //MULTIPLE_HEAPS { - SyncBlockCache::GetSyncBlockCache()->VerifySyncTableEntry(); + GCToEEInterface::VerifySyncTableEntry(); #ifdef MULTIPLE_HEAPS current_join->restart(); #endif //MULTIPLE_HEAPS @@ -33442,17 +33825,58 @@ HRESULT GCHeap::Initialize () #endif //SEG_MAPPING_TABLE #ifdef MULTIPLE_HEAPS + uint32_t nhp_from_config = static_cast(GCConfig::GetHeapCount()); + + // GetGCProcessCpuCount only returns up to 64 procs. + unsigned int nhp_from_process = GCToOSInterface::CanEnableGCCPUGroups() ? + GCToOSInterface::GetTotalProcessorCount(): + GCToOSInterface::GetCurrentProcessCpuCount(); + + unsigned int nhp = ((nhp_from_config == 0) ? nhp_from_process : + (min (nhp_from_config, nhp_from_process))); + + + nhp = min (nhp, MAX_SUPPORTED_CPUS); + if (GCConfig::GetNoAffinitize()) gc_heap::gc_thread_no_affinitize_p = true; - uint32_t nhp_from_config = static_cast(GCConfig::GetHeapCount()); - - uint32_t nhp_from_process = GCToOSInterface::GetCurrentProcessCpuCount(); +#if !defined(FEATURE_REDHAWK) && !defined(FEATURE_CORECLR) + if (!(gc_heap::gc_thread_no_affinitize_p)) + { + if (!(GCToOSInterface::CanEnableGCCPUGroups())) + { + size_t gc_thread_affinity_mask = static_cast(GCConfig::GetGCHeapAffinitizeMask()); - uint32_t nhp = ((nhp_from_config == 0) ? nhp_from_process : - (min (nhp_from_config, nhp_from_process))); + uintptr_t pmask, smask; + if (GCToOSInterface::GetCurrentProcessAffinityMask(&pmask, &smask)) + { + pmask &= smask; - nhp = min (nhp, MAX_SUPPORTED_CPUS); + if (gc_thread_affinity_mask) + { + pmask &= gc_thread_affinity_mask; + } + + process_mask = pmask; + + unsigned int set_bits_in_pmask = 0; + while (pmask) + { + if (pmask & 1) + set_bits_in_pmask++; + pmask >>= 1; + } + + nhp = min (nhp, set_bits_in_pmask); + } + else + { + gc_heap::gc_thread_no_affinitize_p = true; + } + } + } +#endif //!FEATURE_REDHAWK && !FEATURE_CORECLR hr = gc_heap::initialize_gc (seg_size, large_seg_size /*LHEAP_ALLOC*/, nhp); #else @@ -33469,20 +33893,34 @@ HRESULT GCHeap::Initialize () gc_heap::mem_one_percent /= g_num_processors; #endif //!MULTIPLE_HEAPS - // We should only use this if we are in the "many process" mode which really is only applicable - // to very powerful machines - before that's implemented, temporarily I am only enabling this for 80GB+ memory. - // For now I am using an estimate to calculate these numbers but this should really be obtained - // programmatically going forward. - // I am assuming 47 processes using WKS GC and 3 using SVR GC. - // I am assuming 3 in part due to the "very high memory load" is 97%. - int available_mem_th = 10; - if (gc_heap::total_physical_mem >= ((uint64_t)80 * 1024 * 1024 * 1024)) + uint32_t highmem_th_from_config = (uint32_t)GCConfig::GetGCHighMemPercent(); + if (highmem_th_from_config) { - int adjusted_available_mem_th = 3 + (int)((float)47 / (float)(g_num_processors)); - available_mem_th = min (available_mem_th, adjusted_available_mem_th); + gc_heap::high_memory_load_th = min (99, highmem_th_from_config); + gc_heap::v_high_memory_load_th = min (99, (highmem_th_from_config + 7)); + } + else + { + // We should only use this if we are in the "many process" mode which really is only applicable + // to very powerful machines - before that's implemented, temporarily I am only enabling this for 80GB+ memory. + // For now I am using an estimate to calculate these numbers but this should really be obtained + // programmatically going forward. + // I am assuming 47 processes using WKS GC and 3 using SVR GC. + // I am assuming 3 in part due to the "very high memory load" is 97%. + int available_mem_th = 10; + if (gc_heap::total_physical_mem >= ((uint64_t)80 * 1024 * 1024 * 1024)) + { + int adjusted_available_mem_th = 3 + (int)((float)47 / (float)(GCToOSInterface::GetTotalProcessorCount())); + available_mem_th = min (available_mem_th, adjusted_available_mem_th); + } + + gc_heap::high_memory_load_th = 100 - available_mem_th; + gc_heap::v_high_memory_load_th = 97; } - gc_heap::high_memory_load_th = 100 - available_mem_th; + gc_heap::m_high_memory_load_th = min ((gc_heap::high_memory_load_th + 5), gc_heap::v_high_memory_load_th); + + gc_heap::pm_stress_on = (GCConfig::GetGCProvModeStress() != 0); #if defined(BIT64) gc_heap::youngest_gen_desired_th = gc_heap::mem_one_percent; @@ -33504,7 +33942,9 @@ HRESULT GCHeap::Initialize () #if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS) if (GCStress::IsEnabled()) { for(int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++) + { m_StressObjs[i] = CreateGlobalHandle(0); + } m_CurStressObj = 0; } #endif //STRESS_HEAP && !MULTIPLE_HEAPS @@ -33595,6 +34035,19 @@ size_t GCHeap::GetPromotedBytes(int heap_index) } } +void GCHeap::SetYieldProcessorScalingFactor (float scalingFactor) +{ + assert (yp_spin_count_unit != 0); + int saved_yp_spin_count_unit = yp_spin_count_unit; + yp_spin_count_unit = (int)((float)yp_spin_count_unit * scalingFactor / (float)9); + + // It's very suspicious if it becomes 0 + if (yp_spin_count_unit == 0) + { + yp_spin_count_unit = saved_yp_spin_count_unit; + } +} + unsigned int GCHeap::WhichGeneration (Object* object) { gc_heap* hp = gc_heap::heap_of ((uint8_t*)object); @@ -33776,11 +34229,11 @@ void GCHeap::Promote(Object** ppObject, ScanContext* sc, uint32_t flags) #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING size_t promoted_size_end = hp->promoted_bytes (thread); - if (g_fEnableARM) + if (g_fEnableAppDomainMonitoring) { if (sc->pCurrentDomain) { - sc->pCurrentDomain->RecordSurvivedBytes ((promoted_size_end - promoted_size_begin), thread); + GCToEEInterface::RecordSurvivedBytesForHeap((promoted_size_end - promoted_size_begin), thread, sc->pCurrentDomain); } } #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING @@ -33857,7 +34310,7 @@ void GCHeap::Relocate (Object** ppObject, ScanContext* sc, // For now we simply look at the size of the object to determine if it in the // fixed heap or not. If the bit indicating this gets set at some point // we should key off that instead. - return size( pObj ) >= LARGE_OBJECT_SIZE; + return size( pObj ) >= loh_size_threshold; } #ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way @@ -34006,7 +34459,7 @@ bool GCHeap::StressHeap(gc_alloc_context * context) while(HndFetchHandle(m_StressObjs[i]) == 0) { _ASSERTE(m_StressObjs[i] != 0); - unsigned strLen = (LARGE_OBJECT_SIZE - 32) / sizeof(WCHAR); + unsigned strLen = ((unsigned)loh_size_threshold - 32) / sizeof(WCHAR); unsigned strSize = PtrAlign(StringObject::GetSize(strLen)); // update the cached type handle before allocating @@ -34016,7 +34469,6 @@ bool GCHeap::StressHeap(gc_alloc_context * context) { str->SetMethodTable (g_pStringClass); str->SetStringLength (strLen); - HndAssignHandle(m_StressObjs[i], ObjectToOBJECTREF(str)); } i = (i + 1) % NUM_HEAP_STRESS_OBJS; @@ -34159,7 +34611,7 @@ GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint3 #endif //COUNT_CYCLES #endif //TRACE_GC - if (size < LARGE_OBJECT_SIZE) + if (size < loh_size_threshold) { #ifdef TRACE_GC AllocSmallCount++; @@ -34219,7 +34671,7 @@ GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint3 // support mis-aligned object headers so we can't support biased headers as above. Luckily for us // we've managed to arrange things so the only case where we see a bias is for boxed value types and // these can never get large enough to be allocated on the LOH. - ASSERT(65536 < LARGE_OBJECT_SIZE); + ASSERT(65536 < loh_size_threshold); ASSERT((flags & GC_ALLOC_ALIGN8_BIAS) == 0); alloc_context* acontext = generation_alloc_context (hp->generation_of (max_generation+1)); @@ -34346,7 +34798,7 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_ #endif //_PREFAST_ #endif //MULTIPLE_HEAPS - if (size < LARGE_OBJECT_SIZE) + if (size < loh_size_threshold) { #ifdef TRACE_GC @@ -34381,7 +34833,7 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_ } void -GCHeap::FixAllocContext (gc_alloc_context* context, bool lockp, void* arg, void *heap) +GCHeap::FixAllocContext (gc_alloc_context* context, void* arg, void *heap) { alloc_context* acontext = static_cast(context); #ifdef MULTIPLE_HEAPS @@ -34403,16 +34855,8 @@ GCHeap::FixAllocContext (gc_alloc_context* context, bool lockp, void* arg, void if (heap == NULL || heap == hp) { - if (lockp) - { - enter_spin_lock (&hp->more_space_lock); - } hp->fix_allocation_context (acontext, ((arg != 0)? TRUE : FALSE), - get_alignment_constant(TRUE)); - if (lockp) - { - leave_spin_lock (&hp->more_space_lock); - } + get_alignment_constant(TRUE)); } } @@ -34604,12 +35048,18 @@ GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode) if (low_memory_p) { if (mode & collection_blocking) + { reason = reason_lowmemory_blocking; + } else + { reason = reason_lowmemory; + } } else + { reason = reason_induced; + } if (reason == reason_induced) { @@ -34703,9 +35153,9 @@ void gc_heap::do_pre_gc() } #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING - if (g_fEnableARM) + if (g_fEnableAppDomainMonitoring) { - SystemDomain::ResetADSurvivedBytes(); + GCToEEInterface::ResetTotalSurvivedBytes(); } #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING } @@ -34808,6 +35258,44 @@ BOOL gc_heap::should_do_sweeping_gc (BOOL compact_p) } #endif //GC_CONFIG_DRIVEN +bool gc_heap::is_pm_ratio_exceeded() +{ + size_t maxgen_frag = 0; + size_t maxgen_size = 0; + size_t total_heap_size = get_total_heap_size(); + +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < gc_heap::n_heaps; i++) + { + gc_heap* hp = gc_heap::g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + + maxgen_frag += dd_fragmentation (hp->dynamic_data_of (max_generation)); + maxgen_size += hp->generation_size (max_generation); + } + + double maxgen_ratio = (double)maxgen_size / (double)total_heap_size; + double maxgen_frag_ratio = (double)maxgen_frag / (double)maxgen_size; + dprintf (GTC_LOG, ("maxgen %Id(%d%% total heap), frag: %Id (%d%% maxgen)", + maxgen_size, (int)(maxgen_ratio * 100.0), + maxgen_frag, (int)(maxgen_frag_ratio * 100.0))); + + bool maxgen_highfrag_p = ((maxgen_ratio > 0.5) && (maxgen_frag_ratio > 0.1)); + + // We need to adjust elevation here because if there's enough fragmentation it's not + // unproductive. + if (maxgen_highfrag_p) + { + settings.should_lock_elevation = FALSE; + dprintf (GTC_LOG, ("high frag gen2, turn off elevation")); + } + + return maxgen_highfrag_p; +} + void gc_heap::do_post_gc() { if (!settings.concurrent) @@ -34851,6 +35339,59 @@ void gc_heap::do_post_gc() last_gc_heap_size = get_total_heap_size(); last_gc_fragmentation = get_total_fragmentation(); + // Note we only do this at the end of full blocking GCs because we do not want + // to turn on this provisional mode during the middle of a BGC. + if ((settings.condemned_generation == max_generation) && (!settings.concurrent)) + { + if (pm_stress_on) + { + size_t full_compacting_gc_count = full_gc_counts[gc_type_compacting]; + if (provisional_mode_triggered) + { + uint64_t r = gc_rand::get_rand(10); + if ((full_compacting_gc_count - provisional_triggered_gc_count) >= r) + { + provisional_mode_triggered = false; + provisional_off_gc_count = full_compacting_gc_count; + dprintf (GTC_LOG, ("%Id NGC2s when turned on, %Id NGCs since(%Id)", + provisional_triggered_gc_count, (full_compacting_gc_count - provisional_triggered_gc_count), + num_provisional_triggered)); + } + } + else + { + uint64_t r = gc_rand::get_rand(5); + if ((full_compacting_gc_count - provisional_off_gc_count) >= r) + { + provisional_mode_triggered = true; + provisional_triggered_gc_count = full_compacting_gc_count; + num_provisional_triggered++; + dprintf (GTC_LOG, ("%Id NGC2s when turned off, %Id NGCs since(%Id)", + provisional_off_gc_count, (full_compacting_gc_count - provisional_off_gc_count), + num_provisional_triggered)); + } + } + } + else + { + if (provisional_mode_triggered) + { + if ((settings.entry_memory_load < high_memory_load_th) || + !is_pm_ratio_exceeded()) + { + dprintf (GTC_LOG, ("turning off PM")); + provisional_mode_triggered = false; + } + } + else if ((settings.entry_memory_load >= high_memory_load_th) && is_pm_ratio_exceeded()) + { + dprintf (GTC_LOG, ("highmem && highfrag - turning on PM")); + provisional_mode_triggered = true; + num_provisional_triggered++; + } + } + } + GCHeap::UpdatePostGCCounters(); #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING //if (g_fEnableARM) @@ -34938,11 +35479,11 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason) #endif //COUNT_CYCLES #endif //TRACE_GC - gc_heap::g_low_memory_status = (reason == reason_lowmemory) || - (reason == reason_lowmemory_blocking) || - (gc_heap::latency_level == latency_level_memory_footprint); + gc_heap::g_low_memory_status = (reason == reason_lowmemory) || + (reason == reason_lowmemory_blocking) || + (gc_heap::latency_level == latency_level_memory_footprint); - gc_trigger_reason = reason; + gc_trigger_reason = reason; #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) @@ -35011,12 +35552,16 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason) condemned_generation_number = GcCondemnedGeneration; #else - if (gc_heap::proceed_with_gc_p) - { - BEGIN_TIMING(gc_during_log); - pGenGCHeap->garbage_collect (condemned_generation_number); - END_TIMING(gc_during_log); - } + if (gc_heap::proceed_with_gc_p) + { + BEGIN_TIMING(gc_during_log); + pGenGCHeap->garbage_collect (condemned_generation_number); + if (gc_heap::pm_trigger_full_gc) + { + pGenGCHeap->garbage_collect_pm_full_gc(); + } + END_TIMING(gc_during_log); + } #endif //MULTIPLE_HEAPS #ifdef TRACE_GC @@ -35383,6 +35928,7 @@ void GCHeap::PublishObject (uint8_t* Obj) #ifdef BACKGROUND_GC gc_heap* hp = gc_heap::heap_of (Obj); hp->bgc_alloc_lock->loh_alloc_done (Obj); + hp->bgc_untrack_loh_alloc(); #endif //BACKGROUND_GC } @@ -36009,9 +36555,9 @@ CFinalize::GcScanRoots (promote_func* fn, int hn, ScanContext *pSC) //dprintf (3, ("scan freacheable %Ix", (size_t)o)); dprintf (3, ("scan f %Ix", (size_t)o)); #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING - if (g_fEnableARM) + if (g_fEnableAppDomainMonitoring) { - pSC->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(o->GetAppDomainIndex()); + pSC->pCurrentDomain = GCToEEInterface::GetAppDomainForObject(o); } #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING diff --git a/src/coreclr/src/gc/gc.h b/src/coreclr/src/gc/gc.h index 6bed31f..7d63d4b 100644 --- a/src/coreclr/src/gc/gc.h +++ b/src/coreclr/src/gc/gc.h @@ -52,6 +52,7 @@ struct fgm_history } }; +// These values should be in sync with the GC_REASONs (in eventtrace.h) used for ETW. // TODO : it would be easier to make this an ORed value enum gc_reason { @@ -67,6 +68,8 @@ enum gc_reason reason_lowmemory_blocking = 9, reason_induced_compacting = 10, reason_lowmemory_host = 11, + reason_pm_full_gc = 12, // provisional mode requested to trigger full GC + reason_lowmemory_host_blocking = 13, reason_max }; diff --git a/src/coreclr/src/gc/gcconfig.h b/src/coreclr/src/gc/gcconfig.h index ea44a09..c23d3ea 100644 --- a/src/coreclr/src/gc/gcconfig.h +++ b/src/coreclr/src/gc/gcconfig.h @@ -77,6 +77,8 @@ public: INT_CONFIG(HeapVerifyLevel, "HeapVerify", HEAPVERIFY_NONE, \ "When set verifies the integrity of the managed heap on entry and exit of each GC") \ INT_CONFIG(LOHCompactionMode, "GCLOHCompact", 0, "Specifies the LOH compaction mode") \ + INT_CONFIG(LOHThreshold, "GCLOHThreshold", LARGE_OBJECT_SIZE, \ + "Specifies the size that will make objects go on LOH") \ INT_CONFIG(BGCSpinCount, "BGCSpinCount", 140, "Specifies the bgc spin count") \ INT_CONFIG(BGCSpin, "BGCSpin", 2, "Specifies the bgc spin time") \ INT_CONFIG(HeapCount, "GCHeapCount", 0, "Specifies the number of server GC heaps") \ @@ -91,6 +93,12 @@ public: INT_CONFIG(LogFileSize, "GCLogFileSize", 0, "Specifies the GC log file size") \ INT_CONFIG(CompactRatio, "GCCompactRatio", 0, \ "Specifies the ratio compacting GCs vs sweeping") \ + INT_CONFIG(GCHeapAffinitizeMask, "GCHeapAffinitizeMask", 0, \ + "Specifies processor mask for Server GC threads") \ + INT_CONFIG(GCHighMemPercent, "GCHighMemPercent", 0, \ + "The percent for GC to consider as high memory") \ + INT_CONFIG(GCProvModeStress, "GCProvModeStress", 0, \ + "Stress the provisional modes") \ STRING_CONFIG(LogFile, "GCLogFile", "Specifies the name of the GC log file") \ STRING_CONFIG(ConfigLogFile, "GCConfigLogFile", \ "Specifies the name of the GC config log file") \ diff --git a/src/coreclr/src/gc/gcee.cpp b/src/coreclr/src/gc/gcee.cpp index ca5fe51..52f2eb3 100644 --- a/src/coreclr/src/gc/gcee.cpp +++ b/src/coreclr/src/gc/gcee.cpp @@ -615,11 +615,6 @@ void GCHeap::SetSuspensionPending(bool fSuspensionPending) } } -void GCHeap::SetYieldProcessorScalingFactor(uint32_t yieldProcessorScalingFactor) -{ - g_yieldProcessorScalingFactor = yieldProcessorScalingFactor; -} - void GCHeap::ControlEvents(GCEventKeyword keyword, GCEventLevel level) { GCEventStatus::Set(GCEventProvider_Default, keyword, level); diff --git a/src/coreclr/src/gc/gcenv.ee.standalone.inl b/src/coreclr/src/gc/gcenv.ee.standalone.inl index 52c91ed..b000dad 100644 --- a/src/coreclr/src/gc/gcenv.ee.standalone.inl +++ b/src/coreclr/src/gc/gcenv.ee.standalone.inl @@ -317,4 +317,10 @@ inline void GCToEEInterface::AnalyzeSurvivorsFinished(int condemnedGeneration) g_theGCToCLR->AnalyzeSurvivorsFinished(condemnedGeneration); } +inline void GCToEEInterface::VerifySyncTableEntry() +{ + assert(g_theGCToCLR != nullptr); + g_theGCToCLR->VerifySyncTableEntry(); +} + #endif // __GCTOENV_EE_STANDALONE_INL__ diff --git a/src/coreclr/src/gc/gcevents.h b/src/coreclr/src/gc/gcevents.h index f834c1c..f13cf06 100644 --- a/src/coreclr/src/gc/gcevents.h +++ b/src/coreclr/src/gc/gcevents.h @@ -25,7 +25,6 @@ KNOWN_EVENT(GCAllocationTick_V1, GCEventProvider_Default, GCEventLevel_Verbose, KNOWN_EVENT(GCAllocationTick_V3, GCEventProvider_Default, GCEventLevel_Verbose, GCEventKeyword_GC) KNOWN_EVENT(PinObjectAtGCTime, GCEventProvider_Default, GCEventLevel_Verbose, GCEventKeyword_GC) KNOWN_EVENT(GCPerHeapHistory_V3, GCEventProvider_Default, GCEventLevel_Information, GCEventKeyword_GC) -KNOWN_EVENT(PinPlugAtGCTime, GCEventProvider_Private, GCEventLevel_Verbose, GCEventKeyword_GC) KNOWN_EVENT(SetGCHandle, GCEventProvider_Default, GCEventLevel_Information, GCEventKeyword_GCHandle) KNOWN_EVENT(DestroyGCHandle, GCEventProvider_Default, GCEventLevel_Information, GCEventKeyword_GCHandle) @@ -33,6 +32,7 @@ KNOWN_EVENT(DestroyGCHandle, GCEventProvider_Default, GCEventLevel_Information, KNOWN_EVENT(BGCBegin, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) KNOWN_EVENT(BGC1stNonConEnd, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) KNOWN_EVENT(BGC1stConEnd, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) +KNOWN_EVENT(BGC1stSweepEnd, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) KNOWN_EVENT(BGC2ndNonConBegin, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) KNOWN_EVENT(BGC2ndNonConEnd, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) KNOWN_EVENT(BGC2ndConBegin, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) @@ -45,6 +45,7 @@ KNOWN_EVENT(BGCAllocWaitEnd, GCEventProvider_Private, GCEventLevel_Information, KNOWN_EVENT(GCFullNotify_V1, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCPrivate) KNOWN_EVENT(PrvSetGCHandle, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCHandlePrivate) KNOWN_EVENT(PrvDestroyGCHandle, GCEventProvider_Private, GCEventLevel_Information, GCEventKeyword_GCHandlePrivate) +KNOWN_EVENT(PinPlugAtGCTime, GCEventProvider_Private, GCEventLevel_Verbose, GCEventKeyword_GC) #undef KNOWN_EVENT #undef DYNAMIC_EVENT diff --git a/src/coreclr/src/gc/gcimpl.h b/src/coreclr/src/gc/gcimpl.h index fe85964..4862b8f 100644 --- a/src/coreclr/src/gc/gcimpl.h +++ b/src/coreclr/src/gc/gcimpl.h @@ -93,7 +93,7 @@ public: void SetSuspensionPending(bool fSuspensionPending); - void SetYieldProcessorScalingFactor(uint32_t yieldProcessorScalingFactor); + void SetYieldProcessorScalingFactor(float yieldProcessorScalingFactor); void SetWaitForGCEvent(); void ResetWaitForGCEvent(); @@ -108,8 +108,7 @@ public: Object* AllocLHeap (size_t size, uint32_t flags); Object* Alloc (gc_alloc_context* acontext, size_t size, uint32_t flags); - void FixAllocContext (gc_alloc_context* acontext, - bool lockp, void* arg, void *heap); + void FixAllocContext (gc_alloc_context* acontext, void* arg, void *heap); Object* GetContainingObject(void *pInteriorPtr, bool fCollectedGenOnly); diff --git a/src/coreclr/src/gc/gcinterface.ee.h b/src/coreclr/src/gc/gcinterface.ee.h index 9a38f9f..228bb37 100644 --- a/src/coreclr/src/gc/gcinterface.ee.h +++ b/src/coreclr/src/gc/gcinterface.ee.h @@ -132,6 +132,8 @@ public: virtual void FireBGC1stConEnd() = 0; virtual + void FireBGC1stSweepEnd(uint32_t genNumber) = 0; + virtual void FireBGC2ndNonConBegin() = 0; virtual void FireBGC2ndNonConEnd() = 0; @@ -430,6 +432,9 @@ public: virtual void AnalyzeSurvivorsFinished(int condemnedGeneration) = 0; + + virtual + void VerifySyncTableEntry() = 0; }; #endif // _GCINTERFACE_EE_H_ diff --git a/src/coreclr/src/gc/gcinterface.h b/src/coreclr/src/gc/gcinterface.h index 6df1e66..72e0d9d 100644 --- a/src/coreclr/src/gc/gcinterface.h +++ b/src/coreclr/src/gc/gcinterface.h @@ -718,7 +718,7 @@ public: // "Fixes" an allocation context by binding its allocation pointer to a // location on the heap. - virtual void FixAllocContext(gc_alloc_context* acontext, bool lockp, void* arg, void* heap) = 0; + virtual void FixAllocContext(gc_alloc_context* acontext, void* arg, void* heap) = 0; // Gets the total survived size plus the total allocated bytes on the heap. virtual size_t GetCurrentObjSize() = 0; @@ -733,7 +733,7 @@ public: virtual void SetSuspensionPending(bool fSuspensionPending) = 0; // Tells the GC how many YieldProcessor calls are equal to one scaled yield processor call. - virtual void SetYieldProcessorScalingFactor(uint32_t yieldProcessorScalingFactor) = 0; + virtual void SetYieldProcessorScalingFactor(float yieldProcessorScalingFactor) = 0; /* ============================================================================ diff --git a/src/coreclr/src/gc/gcpriv.h b/src/coreclr/src/gc/gcpriv.h index c2f7356..ded7a6b 100644 --- a/src/coreclr/src/gc/gcpriv.h +++ b/src/coreclr/src/gc/gcpriv.h @@ -1,6 +1,10 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// // optimize for speed @@ -121,6 +125,10 @@ inline void FATAL_GC_ERROR() #define FFIND_OBJECT //faster find_object, slower allocation #define FFIND_DECAY 7 //Number of GC for which fast find will be active +#ifndef MAX_LONGPATH +#define MAX_LONGPATH 1024 +#endif // MAX_LONGPATH + //#define DEBUG_WRITE_WATCH //Additional debug for write watch //#define STRESS_PINNING //Stress pinning by pinning randomly @@ -194,7 +202,6 @@ void GCLogConfig (const char *fmt, ... ); #define MIN_NUM_FREE_SPACES 5 //Please leave these definitions intact. - // hosted api #ifdef memcpy #undef memcpy @@ -259,8 +266,7 @@ void GCLog (const char *fmt, ... ); //#define dprintf(l,x) {if ((l==GTC_LOG) || (l <= 1)) {GCLog x;}} //#define dprintf(l,x) {if (trace_gc && ((l <= print_level) || (l==GTC_LOG))) {GCLog x;}} //#define dprintf(l,x) {if (l==GTC_LOG) {printf ("\n");printf x ; fflush(stdout);}} -#else - +#else //SIMPLE_DPRINTF // Nobody used the logging mechanism that used to be here. If we find ourselves // wanting to inspect GC logs on unmodified builds, we can use this define here // to do so. @@ -455,8 +461,6 @@ enum gc_type gc_type_max = 3 }; -#define v_high_memory_load_th 97 - //encapsulates the mechanism for the current gc class gc_mechanisms { @@ -618,7 +622,6 @@ extern GCStatistics g_LastGCStatistics; #endif // GC_STATS - typedef DPTR(class heap_segment) PTR_heap_segment; typedef DPTR(class gc_heap) PTR_gc_heap; typedef DPTR(PTR_gc_heap) PTR_PTR_gc_heap; @@ -884,6 +887,9 @@ struct etw_opt_info int gen_number; }; +// Note, I am not removing the ones that are no longer used +// because the older versions of the runtime still use them +// and ETW interprets them. enum alloc_wait_reason { // When we don't care about firing an event for @@ -918,10 +924,12 @@ enum alloc_wait_reason // waiting for BGC to let FGC happen awr_fgc_wait_for_bgc = 8, - // wait for bgc to finish to get loh seg. + // wait for bgc to finish to get loh seg. + // no longer used with the introduction of loh msl. awr_get_loh_seg = 9, // we don't allow loh allocation during bgc planning. + // no longer used with the introduction of loh msl. awr_loh_alloc_during_plan = 10, // we don't allow too much loh allocation during bgc. @@ -935,8 +943,8 @@ struct alloc_thread_wait_data enum msl_take_state { - mt_get_large_seg, - mt_wait_bgc_plan, + mt_get_large_seg = 0, + mt_bgc_loh_sweep, mt_wait_bgc, mt_block_gc, mt_clr_mem, @@ -962,9 +970,10 @@ struct spinlock_info msl_enter_state enter_state; msl_take_state take_state; EEThreadId thread_id; + bool loh_p; }; -const unsigned HS_CACHE_LINE_SIZE = 128; +#define HS_CACHE_LINE_SIZE 128 #ifdef SNOOP_STATS struct snoop_stats_data @@ -1231,7 +1240,19 @@ public: // returning FALSE means we actually didn't do a GC. This happens // when we figured that we needed to do a BGC. PER_HEAP - int garbage_collect (int n); + void garbage_collect (int n); + + // Since we don't want to waste a join just to do this, I am doing + // doing this at the last join in gc1. + PER_HEAP_ISOLATED + void pm_full_gc_init_or_clear(); + + // This does a GC when pm_trigger_full_gc is set + PER_HEAP + void garbage_collect_pm_full_gc(); + + PER_HEAP_ISOLATED + bool is_pm_ratio_exceeded(); PER_HEAP void init_records(); @@ -1309,8 +1330,11 @@ protected: BOOL check_only_p); PER_HEAP_ISOLATED - int joined_generation_to_condemn (BOOL should_evaluate_elevation, int n_initial, BOOL* blocking_collection - STRESS_HEAP_ARG(int n_original)); + int joined_generation_to_condemn (BOOL should_evaluate_elevation, + int initial_gen, + int current_gen, + BOOL* blocking_collection + STRESS_HEAP_ARG(int n_original)); PER_HEAP size_t min_reclaim_fragmentation_threshold (uint32_t num_heaps); @@ -1429,10 +1453,10 @@ protected: #ifdef BACKGROUND_GC PER_HEAP - void wait_for_background (alloc_wait_reason awr); + void wait_for_background (alloc_wait_reason awr, bool loh_p); PER_HEAP - void wait_for_bgc_high_memory (alloc_wait_reason awr); + void wait_for_bgc_high_memory (alloc_wait_reason awr, bool loh_p); PER_HEAP void bgc_loh_alloc_clr (uint8_t* alloc_start, @@ -1446,7 +1470,10 @@ protected: #ifdef BACKGROUND_GC PER_HEAP - void wait_for_background_planning (alloc_wait_reason awr); + void bgc_track_loh_alloc(); + + PER_HEAP + void bgc_untrack_loh_alloc(); PER_HEAP BOOL bgc_loh_should_allocate(); @@ -1464,10 +1491,16 @@ protected: PER_HEAP void add_saved_spinlock_info ( + bool loh_p, msl_enter_state enter_state, msl_take_state take_state); PER_HEAP + void trigger_gc_for_alloc (int gen_number, gc_reason reason, + GCSpinLock* spin_lock, bool loh_p, + msl_take_state take_state); + + PER_HEAP BOOL a_fit_free_list_large_p (size_t size, alloc_context* acontext, int align_const); @@ -1501,11 +1534,13 @@ protected: PER_HEAP BOOL check_and_wait_for_bgc (alloc_wait_reason awr, - BOOL* did_full_compact_gc); + BOOL* did_full_compact_gc, + bool loh_p); PER_HEAP BOOL trigger_full_compact_gc (gc_reason gr, - oom_reason* oom_r); + oom_reason* oom_r, + bool loh_p); PER_HEAP BOOL trigger_ephemeral_gc (gc_reason gr); @@ -1645,7 +1680,7 @@ protected: PER_HEAP void reset_write_watch (BOOL concurrent_p); PER_HEAP - void adjust_ephemeral_limits (); + void adjust_ephemeral_limits(); PER_HEAP void make_generation (generation& gen, heap_segment* seg, uint8_t* start, uint8_t* pointer); @@ -1969,6 +2004,11 @@ protected: void init_background_gc(); PER_HEAP uint8_t* background_next_end (heap_segment*, BOOL); + // while we are in LOH sweep we can't modify the segment list + // there so we mark them as to be deleted and deleted them + // at the next chance we get. + PER_HEAP + void background_delay_delete_loh_segments(); PER_HEAP void generation_delete_heap_segment (generation*, heap_segment*, heap_segment*, heap_segment*); @@ -2539,8 +2579,6 @@ protected: PER_HEAP void descr_segment (heap_segment* seg); PER_HEAP - void descr_card_table (); - PER_HEAP void descr_generations (BOOL begin_gc_p); PER_HEAP_ISOLATED @@ -2727,7 +2765,6 @@ protected: void do_background_gc(); static void bgc_thread_stub (void* arg); - #endif //BACKGROUND_GC public: @@ -2985,6 +3022,12 @@ public: uint32_t high_memory_load_th; PER_HEAP_ISOLATED + uint32_t m_high_memory_load_th; + + PER_HEAP_ISOLATED + uint32_t v_high_memory_load_th; + + PER_HEAP_ISOLATED uint64_t mem_one_percent; PER_HEAP_ISOLATED @@ -3062,8 +3105,10 @@ protected: PER_HEAP mark* mark_stack_array; +#if defined (_DEBUG) && defined (VERIFY_HEAP) PER_HEAP - BOOL verify_pinned_queue_p; + BOOL verify_pinned_queue_p; +#endif // _DEBUG && VERIFY_HEAP PER_HEAP uint8_t* oldest_pinned_plug; @@ -3246,6 +3291,9 @@ protected: size_t background_loh_alloc_count; PER_HEAP + VOLATILE(int32_t) loh_alloc_thread_count; + + PER_HEAP uint8_t** background_mark_stack_tos; PER_HEAP @@ -3338,11 +3386,13 @@ protected: PER_HEAP uint8_t* max_overflow_address; +#ifndef MULTIPLE_HEAPS PER_HEAP uint8_t* shigh; //keeps track of the highest marked object PER_HEAP uint8_t* slow; //keeps track of the lowest marked object +#endif //MULTIPLE_HEAPS PER_HEAP size_t allocation_quantum; @@ -3373,15 +3423,18 @@ protected: // The more_space_lock and gc_lock is used for 3 purposes: // - // 1) to coordinate threads that exceed their quantum (UP & MP) (more_space_lock) - // 2) to synchronize allocations of large objects (more_space_lock) + // 1) to coordinate threads that exceed their quantum (UP & MP) (more_space_lock_soh) + // 2) to synchronize allocations of large objects (more_space_lock_loh) // 3) to synchronize the GC itself (gc_lock) // PER_HEAP_ISOLATED GCSpinLock gc_lock; //lock while doing GC PER_HEAP - GCSpinLock more_space_lock; //lock while allocating more space + GCSpinLock more_space_lock_soh; //lock while allocating more space for soh + + PER_HEAP + GCSpinLock more_space_lock_loh; #ifdef SYNCHRONIZATION_STATS @@ -3492,6 +3545,9 @@ protected: #endif //FFIND_OBJECT PER_HEAP_ISOLATED + bool maxgen_size_inc_p; + + PER_HEAP_ISOLATED size_t full_gc_counts[gc_type_max]; // the # of bytes allocates since the last full compacting GC. @@ -3509,6 +3565,59 @@ protected: PER_HEAP_ISOLATED BOOL should_expand_in_full_gc; + // When we decide if we should expand the heap or not, we are + // fine NOT to expand if we find enough free space in gen0's free + // list or end of seg and we check this in decide_on_compacting. + // This is an expensive check so we just record the fact and not + // need to check in the allocator again. + PER_HEAP + BOOL sufficient_gen0_space_p; + +#ifdef MULTIPLE_HEAPS + PER_HEAP + bool gen0_allocated_after_gc_p; +#endif //MULTIPLE_HEAPS + + // A provisional mode means we could change our mind in the middle of a GC + // and want to do a different GC instead. + // + // Right now there's only one such case which is in the middle of a gen1 + // GC we want to do a blocking gen2 instead. If/When we have more we should + // have an enum that tells us which case in this provisional mode + // we are in. + // + // When this mode is triggered, our current (only) condition says + // we have high fragmentation in gen2 even after we do a compacting + // full GC which is an indication of heavy pinning in gen2. In this + // case we never do BGCs, we just do either gen0 or gen1's till a + // gen1 needs to increase the gen2 size, in which case we finish up + // the current gen1 as a sweeping GC and immediately do a compacting + // full GC instead (without restarting EE). + PER_HEAP_ISOLATED + bool provisional_mode_triggered; + + PER_HEAP_ISOLATED + bool pm_trigger_full_gc; + + // For testing only BEG + // pm_stress_on currently means (since we just have one mode) we + // randomly turn the mode on; and after a random # of NGC2s we + // turn it off. + // NOTE that this means concurrent will be disabled so we can + // simulate what this mode is supposed to be used. + PER_HEAP_ISOLATED + bool pm_stress_on; + + PER_HEAP_ISOLATED + size_t provisional_triggered_gc_count; + + PER_HEAP_ISOLATED + size_t provisional_off_gc_count; + // For testing only END + + PER_HEAP_ISOLATED + size_t num_provisional_triggered; + #ifdef BACKGROUND_GC PER_HEAP_ISOLATED size_t ephemeral_fgc_counts[max_generation]; @@ -3544,7 +3653,6 @@ protected: PER_HEAP size_t interesting_data_per_gc[max_idp_count]; - #endif //GC_CONFIG_DRIVEN PER_HEAP @@ -3680,7 +3788,6 @@ public: protected: PER_HEAP void update_collection_counts (); - }; // class gc_heap #define ASSERT_OFFSETS_MATCH(field) \ @@ -3777,15 +3884,12 @@ public: bool FinalizeAppDomain (void *pDomain, bool fRunFinalizers); void CheckFinalizerObjects(); - }; class CFinalizeStaticAsserts { static_assert(dac_finalize_queue::ExtraSegCount == CFinalize::ExtraSegCount, "ExtraSegCount mismatch"); static_assert(offsetof(dac_finalize_queue, m_FillPointers) == offsetof(CFinalize, m_FillPointers), "CFinalize layout mismatch"); }; - - #endif // FEATURE_PREMORTEM_FINALIZATION inline @@ -3907,7 +4011,6 @@ size_t& dd_fragmentation (dynamic_data* inst) { return inst->fragmentation; } - inline size_t& dd_gc_clock (dynamic_data* inst) { @@ -4163,6 +4266,7 @@ struct loh_padding_obj #define heap_segment_flags_ma_committed 64 // for segments whose mark array is only partially committed. #define heap_segment_flags_ma_pcommitted 128 +#define heap_segment_flags_loh_delete 256 #endif //BACKGROUND_GC //need to be careful to keep enough pad items to fit a relocation node @@ -4326,14 +4430,12 @@ dynamic_data* gc_heap::dynamic_data_of (int gen_number) #define card_size ((size_t)(GC_PAGE_SIZE/card_word_width)) #endif // BIT64 -// Returns the index of the card word a card is in inline size_t card_word (size_t card) { return card / card_word_width; } -// Returns the index of a card within its card word inline unsigned card_bit (size_t card) { diff --git a/src/coreclr/src/inc/clrconfigvalues.h b/src/coreclr/src/inc/clrconfigvalues.h index 5a12b57..20e24dc 100644 --- a/src/coreclr/src/inc/clrconfigvalues.h +++ b/src/coreclr/src/inc/clrconfigvalues.h @@ -306,6 +306,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_GCCompactRatio, W("GCCompactRatio"), 0, "Sp RETAIL_CONFIG_DWORD_INFO_DIRECT_ACCESS(EXTERNAL_GCPollType, W("GCPollType"), "") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_GCRetainVM, W("GCRetainVM"), 0, "When set we put the segments that should be deleted on a standby list (instead of releasing them back to the OS) which will be considered to satisfy new segment requests (note that the same thing can be specified via API which is the supported way)") RETAIL_CONFIG_DWORD_INFO_DIRECT_ACCESS(UNSUPPORTED_GCSegmentSize, W("GCSegmentSize"), "Specifies the managed heap segment size") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_GCLOHThreshold, W("GCLOHThreshold"), 0, "Specifies the size that will make objects go on LOH") RETAIL_CONFIG_DWORD_INFO_DIRECT_ACCESS(UNSUPPORTED_GCLOHCompact, W("GCLOHCompact"), "Specifies the LOH compaction mode") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_gcAllowVeryLargeObjects, W("gcAllowVeryLargeObjects"), 1, "Allow allocation of 2GB+ objects on GC heap") RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_GCStress, W("GCStress"), 0, "Trigger GCs at regular intervals", CLRConfig::REGUTIL_default) @@ -319,8 +320,12 @@ RETAIL_CONFIG_DWORD_INFO_DIRECT_ACCESS(UNSUPPORTED_HeapVerify, W("HeapVerify"), RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_SetupGcCoverage, W("SetupGcCoverage"), "This doesn't appear to be a config flag", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_GCNumaAware, W("GCNumaAware"), 1, "Specifies if to enable GC NUMA aware") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_GCCpuGroup, W("GCCpuGroup"), 0, "Specifies if to enable GC to support CPU groups") -RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_GCHeapCount, W("GCHeapCount"), 0, "") -RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_GCNoAffinitize, W("GCNoAffinitize"), 0, "") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_GCHeapCount, W("GCHeapCount"), 0, "") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_GCNoAffinitize, W("GCNoAffinitize"), 0, "") +// this config is only in effect if the process is not running in multiple CPU groups. +RETAIL_CONFIG_DWORD_INFO_DIRECT_ACCESS(EXTERNAL_GCHeapAffinitizeMask, W("GCHeapAffinitizeMask"), "Specifies processor mask for Server GC threads") +RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_GCProvModeStress, W("GCProvModeStress"), 0, "Stress the provisional modes") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_GCHighMemPercent, W("GCHighMemPercent"), 0, "Specifies the percent for GC to consider as high memory") RETAIL_CONFIG_STRING_INFO(EXTERNAL_GCName, W("GCName"), "") /// diff --git a/src/coreclr/src/inc/configuration.h b/src/coreclr/src/inc/configuration.h index d8f8dff..876abf2 100644 --- a/src/coreclr/src/inc/configuration.h +++ b/src/coreclr/src/inc/configuration.h @@ -31,6 +31,12 @@ public: // - The default value passed in static DWORD GetKnobDWORDValue(LPCWSTR name, DWORD defaultValue); + // Unfortunately our traditional config system insists on interpreting numbers as 32-bit so intepret the config + // in the traditional way separately if you need to. + // + // Returns value for name if found in config. + static ULONGLONG GetKnobULONGLONGValue(LPCWSTR name); + // Returns (in priority order): // - The value of the ConfigStringInfo if it's set // - The value of the ConfigurationKnob (searched by name) if it's set diff --git a/src/coreclr/src/utilcode/configuration.cpp b/src/coreclr/src/utilcode/configuration.cpp index 817bc06..936dcb8 100644 --- a/src/coreclr/src/utilcode/configuration.cpp +++ b/src/coreclr/src/utilcode/configuration.cpp @@ -79,6 +79,17 @@ DWORD Configuration::GetKnobDWORDValue(LPCWSTR name, DWORD defaultValue) return defaultValue; } +ULONGLONG Configuration::GetKnobULONGLONGValue(LPCWSTR name) +{ + LPCWSTR knobValue = GetConfigurationValue(name); + if (knobValue != nullptr) + { + return _wcstoui64(knobValue, nullptr, 0); + } + + return 0; +} + LPCWSTR Configuration::GetKnobStringValue(LPCWSTR name, const CLRConfig::ConfigStringInfo& stringInfo) { LPCWSTR value = CLRConfig::GetConfigValue(stringInfo); diff --git a/src/coreclr/src/vm/ClrEtwAll.man b/src/coreclr/src/vm/ClrEtwAll.man index 1081477..230da5c 100644 --- a/src/coreclr/src/vm/ClrEtwAll.man +++ b/src/coreclr/src/vm/ClrEtwAll.man @@ -4276,6 +4276,7 @@ + @@ -4829,6 +4830,18 @@ + +