From f1c0c7d95164abbbc4dfbd5bc312ef543256b0e0 Mon Sep 17 00:00:00 2001 From: dotnet-bot <dotnet-bot@microsoft.com> Date: Tue, 7 Apr 2015 17:23:32 -0700 Subject: [PATCH] Merge changes from parent branch [tfs-changeset: 1448103] --- src/ToolBox/SOS/Strike/DisasmARM.cpp | 1 + src/ToolBox/SOS/Strike/DisasmARM64.cpp | 1 + src/ToolBox/SOS/Strike/disasm.cpp | 4 + src/debug/di/process.cpp | 40 +- src/dirs.proj | 13 +- src/dlls/mscoree/mscoree.settings.targets | 21 +- src/gc/gc.cpp | 456 ++++++++++-------- src/gc/gcimpl.h | 1 + src/gc/gcpriv.h | 15 +- src/gc/gcrecord.h | 105 ++-- src/gc/objecthandle.cpp | 8 +- src/inc/corprof.idl | 1 + src/inc/eventtrace.h | 22 +- src/inc/eventtracebase.h | 14 +- src/inc/fxretarget.h | 1 + src/inc/stdmacros.h | 10 +- src/jit/assertionprop.cpp | 4 +- src/jit/codegencommon.cpp | 24 +- src/jit/codegenxarch.cpp | 30 +- src/jit/compiler.h | 18 +- src/jit/flowgraph.cpp | 23 +- src/jit/gentree.cpp | 271 ++--------- src/jit/gentree.h | 6 +- src/jit/lsra.cpp | 89 +++- src/jit/lsra.h | 6 +- src/jit/morph.cpp | 210 +++++--- src/jit/optimizer.cpp | 97 +++- src/jit/rangecheck.cpp | 24 +- src/jit/simdcodegenxarch.cpp | 37 +- .../Eventing/FrameworkEventSource.cs | 31 +- src/mscorlib/src/System/Environment.cs | 7 +- .../src/System/Globalization/CultureInfo.cs | 50 +- .../System/Globalization/DateTimeFormat.cs | 1 + .../src/System/Globalization/DateTimeParse.cs | 11 +- .../src/System/Resources/ResourceManager.cs | 8 +- .../Runtime/CompilerServices/TaskAwaiter.cs | 2 +- .../Security/Permissions/FileIOPermission.cs | 1 - src/mscorlib/src/System/String.cs | 2 + .../src/System/Threading/ExecutionContext.cs | 2 + .../src/System/Threading/Tasks/Task.cs | 70 +-- src/tools/crossgen/crossgen.nativeproj | 10 +- src/vm/ClrEtwAll.man | 151 +++++- src/vm/ClrEtwAllMeta.lst | 7 + src/vm/ceemain.cpp | 8 +- src/vm/codeman.cpp | 4 - src/vm/eventtrace.cpp | 241 ++++++--- src/vm/gcinfodecoder.cpp | 28 +- src/vm/jitinterface.cpp | 1 - src/vm/rejit.cpp | 24 +- src/zap/zapimage.cpp | 74 +++ src/zap/zapper.cpp | 4 - 51 files changed, 1453 insertions(+), 836 deletions(-) diff --git a/src/ToolBox/SOS/Strike/DisasmARM.cpp b/src/ToolBox/SOS/Strike/DisasmARM.cpp index 37f89c4002..0f86e7f071 100644 --- a/src/ToolBox/SOS/Strike/DisasmARM.cpp +++ b/src/ToolBox/SOS/Strike/DisasmARM.cpp @@ -34,6 +34,7 @@ namespace ARMGCDump #define LF_GCROOTS #define LL_INFO1000 #define LOG(x) +#define LOG_PIPTR(pObjRef, gcFlags, hCallBack) #define DAC_ARG(x) #include "gcdumpnonx86.cpp" } diff --git a/src/ToolBox/SOS/Strike/DisasmARM64.cpp b/src/ToolBox/SOS/Strike/DisasmARM64.cpp index 8c9f704c31..4bfae58ee1 100644 --- a/src/ToolBox/SOS/Strike/DisasmARM64.cpp +++ b/src/ToolBox/SOS/Strike/DisasmARM64.cpp @@ -37,6 +37,7 @@ namespace ARM64GCDump #define LF_GCROOTS #define LL_INFO1000 #define LOG(x) +#define LOG_PIPTR(pObjRef, gcFlags, hCallBack) #define DAC_ARG(x) #include "gcdumpnonx86.cpp" } diff --git a/src/ToolBox/SOS/Strike/disasm.cpp b/src/ToolBox/SOS/Strike/disasm.cpp index 93a3709b9a..0d27c63bf0 100644 --- a/src/ToolBox/SOS/Strike/disasm.cpp +++ b/src/ToolBox/SOS/Strike/disasm.cpp @@ -56,6 +56,10 @@ namespace X86GCDump #undef LOG #endif #define LOG(x) ((void)0) + #ifdef LOG_PIPTR + #undef LOG_PIPTR + #endif + #define LOG_PIPTR(pObjRef, gcFlags, hCallBack) ((void)0) #include "gcdumpnonx86.cpp" #endif // FEATURE_PAL #endif // SOS_TARGET_AMD64 diff --git a/src/debug/di/process.cpp b/src/debug/di/process.cpp index 32da7dc2e6..167692be72 100644 --- a/src/debug/di/process.cpp +++ b/src/debug/di/process.cpp @@ -1045,8 +1045,7 @@ CordbProcess::~CordbProcess() // We shouldn't still be in Cordb's list of processes. Unfortunately, our root Cordb object // may have already been deleted b/c we're at the mercy of ref-counting, so we can't check. - if (m_sharedAppDomain) - delete m_sharedAppDomain; + _ASSERTE(m_sharedAppDomain == NULL); m_processMutex.Destroy(); m_StopGoLock.Destroy(); @@ -1278,11 +1277,17 @@ void CordbProcess::NeuterChildren() m_ContinueNeuterList.NeuterAndClear(this); m_userThreads.NeuterAndClear(GetProcessLock()); - + m_pDefaultAppDomain = NULL; // Frees per-appdomain left-side resources. See assumptions above. m_appDomains.NeuterAndClear(GetProcessLock()); + if (m_sharedAppDomain != NULL) + { + m_sharedAppDomain->Neuter(); + m_sharedAppDomain->InternalRelease(); + m_sharedAppDomain = NULL; + } m_steppers.NeuterAndClear(GetProcessLock()); @@ -8751,6 +8756,7 @@ CordbAppDomain * CordbProcess::GetSharedAppDomain() { delete pAD; } + m_sharedAppDomain->InternalAddRef(); } return m_sharedAppDomain; @@ -13093,9 +13099,31 @@ void CordbProcess::HandleDebugEventForInteropDebugging(const DEBUG_EVENT * pEven fcd.action = HIJACK_ACTION_EXIT_UNHANDLED; } - // if the user changed the context during this hijack or if it had the SingleStep flag set on it, - // then update the LS context - if (pUnmanagedThread->IsContextSet() || IsSSFlagEnabled(&tempContext)) + // + // LS context is restored here so that execution continues from next instruction that caused the hijack. + // We shouldn't always restore the LS context though. + // Consider the following case where this can cause issues: + // Debuggee process hits an exception and calls KERNELBASE!RaiseException, debugger gets the notification and + // prepares for first-chance hijack. Debugger(DBI) saves the current thread context (see SetupFirstChanceHijackForSync) which is restored + // later below (see SafeWriteThreadContext call) when the process is in VEH (CLRVectoredExceptionHandlerShim->FirstChanceSuspendHijackWorker). + // The thread context that got saved(by SetupFirstChanceHijackForSync) was for when the thread was executing RaiseException and when + // this context gets restored in VEH, the thread resumes after the exception handler with a context that is not same as one with which + // it entered. This inconsistency can lead to bad execution code-paths or even a debuggee crash. + // + // Example case where we should definitely update the LS context: + // After a DbgBreakPoint call, IP gets updated to point to the instruction after int 3 and this is the context saved by debugger. + // The IP in context passed to VEH still points to int 3 though and if we don't update the LS context in VEH, the breakpoint + // instruction will get executed again. + // + // Here's a list of cases when we update the LS context: + // * we know that context was explicitly updated during this hijack, OR + // * if single-stepping flag was set on it originally, OR + // * if this was a breakpoint event + // Note that above list is a heuristic and it is possible that we need to add more such cases in future. + // + BOOL isBreakPointEvent = (pUnmanagedEvent->m_currentDebugEvent.dwDebugEventCode == EXCEPTION_DEBUG_EVENT && + pUnmanagedEvent->m_currentDebugEvent.u.Exception.ExceptionRecord.ExceptionCode == STATUS_BREAKPOINT); + if (pUnmanagedThread->IsContextSet() || IsSSFlagEnabled(&tempContext) || isBreakPointEvent) { _ASSERTE(fcd.pLeftSideContext != NULL); LOG((LF_CORDB, LL_INFO10000, "W32ET::W32EL: updating LS context at 0x%p\n", fcd.pLeftSideContext)); diff --git a/src/dirs.proj b/src/dirs.proj index 3599b7912b..ded6fb4267 100644 --- a/src/dirs.proj +++ b/src/dirs.proj @@ -41,13 +41,14 @@ <ProjectFile Include="delayimp\delayimp.nativeproj" Condition="'$(BuildProjectName)' != 'CoreSys'"/> <ProjectFile Include="dlls\dirs.proj" /> <ProjectFile Include="unwinder\dirs.proj" Condition="'$(TargetArch)' != 'i386'" /> + + <!-- In the CodeGen branch, we use RyuJIT for all JIT builds --> + <ProjectFile Include="jit\dirs.proj" Condition="'$(_BuildBranch)' == 'CodeGen'" /> - <!-- In FXMain we build RyuJIT only for amd64 and arm64 --> - <ProjectFile Include="jit32\dirs.proj" Condition="'$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64'"/> - <ProjectFile Include="jit\dirs.proj" Condition="'$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64'"/> - <!-- In Codegen we always build RyuJIT this includes x86 as well as amd64 and arm64 ==> - <ProjectFile Include="jit\dirs.proj" /> - --> + <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> + <ProjectFile Include="jit\dirs.proj" Condition="('$(_BuildBranch)' != 'CodeGen') and ('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64')" /> + <ProjectFile Include="jit32\dirs.proj" Condition="('$(_BuildBranch)' != 'CodeGen') and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" /> + <ProjectFile Include="jit64\dirs.proj" /> <ProjectFile Include="tools\dirs.proj" /> diff --git a/src/dlls/mscoree/mscoree.settings.targets b/src/dlls/mscoree/mscoree.settings.targets index b82f1f51aa..70262350d4 100644 --- a/src/dlls/mscoree/mscoree.settings.targets +++ b/src/dlls/mscoree/mscoree.settings.targets @@ -20,7 +20,7 @@ <LinkAdditionalOptions>$(LinkAdditionalOptions) /NXCOMPAT</LinkAdditionalOptions> <!-- /NOVCFEATURE forces linker to emit old .pdb format. It is required for scan.exe tool to work --> <LinkAdditionalOptions Condition="'$(BuildType)' == 'Checked' and '$(UseCoreToolset)' != 'true'">$(LinkAdditionalOptions) /NOVCFEATURE</LinkAdditionalOptions> - <LinkGenerateManifest Condition="'$(BuildForCoreSystem)' == 'true'">false</LinkGenerateManifest> + <LinkGenerateManifest Condition="'$(BuildForCoreSystem)' == 'true'">false</LinkGenerateManifest> <CDefines>$(CDefines);UNICODE;_UNICODE</CDefines> <PCHHeader>stdafx.h</PCHHeader> <EnableCxxPCHHeaders>true</EnableCxxPCHHeaders> @@ -91,6 +91,17 @@ <ImportLib Condition="'$(UseMsvcrt)'!='true' and '$(DebugBuild)' == 'true' and '$(BuildForCoreSystem)' != 'true'" Include="$(CrtLibPath)\libcpmtd.lib" /> </ItemGroup> + <ItemGroup Condition="'$(FeatureMergeJitAndEngine)' == 'true'"> + <ImportLib Include="$(ClrLibPath)\clrjit.lib" /> + + <!-- In the CodeGen branch, we use RyuJIT for all JIT builds --> + <ProjectReference Condition="'$(_BuildBranch)' == 'CodeGen'" Include="$(ClrSrcDirectory)jit\dll\jit.nativeproj" /> + + <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> + <ProjectReference Condition="'$(_BuildBranch)' != 'CodeGen' and ('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64')" Include="$(ClrSrcDirectory)jit\dll\jit.nativeproj" /> + <ProjectReference Condition="'$(_BuildBranch)' != 'CodeGen' and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" Include="$(ClrSrcDirectory)jit32\dll\jit.nativeproj" /> + </ItemGroup> + <ItemGroup> <ImportLib Include="$(ClrLibPath)\cee_wks.lib"> <ProjectReference>$(ClrSrcDirectory)vm\wks\wks.nativeproj</ProjectReference> @@ -102,14 +113,6 @@ </ImportLib> <LinkPreCrtLibs Include="$(ClrLibPath)\utilcode.lib" /> - <ImportLib Condition="'$(FeatureMergeJitAndEngine)'=='true' and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" Include="$(ClrLibPath)\clrjit.lib"> - <ProjectReference>$(ClrSrcDirectory)jit32\dll\jit.nativeproj</ProjectReference> - </ImportLib> - - <ImportLib Condition="'$(FeatureMergeJitAndEngine)'=='true' and ('$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64') and '$(BuildProjectName)' == 'CoreSys'" Include="$(ClrLibPath)\clrjit.lib"> - <ProjectReference>$(ClrSrcDirectory)jit\dll\jit.nativeproj</ProjectReference> - </ImportLib> - <ImportLib Include="$(ClrLibPath)\ildbsymlib.lib"> <ProjectReference>$(ClrSrcDirectory)debug\ildbsymlib\HostLocal\ildbsymlib.nativeproj</ProjectReference> </ImportLib> diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp index 873d336010..c031727534 100644 --- a/src/gc/gc.cpp +++ b/src/gc/gc.cpp @@ -23,29 +23,6 @@ #define USE_INTROSORT -// defines for ETW events. -#define ETW_TYPE_GC_MARK_1 21 // after marking stack roots -#define ETW_TYPE_GC_MARK_2 22 // after marking finalize queue roots -#define ETW_TYPE_GC_MARK_3 23 // after marking handles -#define ETW_TYPE_GC_MARK_4 24 // after marking cards - -#define ETW_TYPE_BGC_BEGIN 25 -#define ETW_TYPE_BGC_1ST_NONCON_END 26 -#define ETW_TYPE_BGC_1ST_CON_END 27 -#define ETW_TYPE_BGC_2ND_NONCON_BEGIN 28 -#define ETW_TYPE_BGC_2ND_NONCON_END 29 -#define ETW_TYPE_BGC_2ND_CON_BEGIN 30 -#define ETW_TYPE_BGC_2ND_CON_END 31 -#define ETW_TYPE_BGC_PLAN_END 32 -#define ETW_TYPE_BGC_SWEEP_END 33 - -#define ETW_TYPE_BGC_DRAIN_MARK_LIST 34 -#define ETW_TYPE_BGC_REVISIT 35 -#define ETW_TYPE_BGC_OVERFLOW 36 - -#define ETW_TYPE_ALLOC_WAIT_BEGIN 37 -#define ETW_TYPE_ALLOC_WAIT_END 38 - #if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE) inline BOOL ShouldTrackMovementForProfilerOrEtw() { @@ -133,7 +110,17 @@ static const char* const str_gc_reasons[] = "oos_loh", "induced_noforce", "gcstress", - "induced_lowmem" + "induced_lowmem", + "induced_compacting" +}; + +static const char* const str_gc_pause_modes[] = +{ + "batch", + "interactive", + "low_latency", + "sustained_low_latency", + "no_gc" }; #endif // defined(DT_LOG) || defined(TRACE_GC) @@ -584,13 +571,26 @@ struct join_structure VOLATILE(BOOL) wait_done; }; -typedef enum _join_type { - type_last_join, type_join, type_restart -} join_type; +enum join_type +{ + type_last_join = 0, + type_join = 1, + type_restart = 2, + type_first_r_join = 3, + type_r_join = 4 +}; + +enum join_time +{ + time_start = 0, + time_end = 1 +}; -typedef enum _join_time { - time_start, time_end -} join_time; +enum join_heap_index +{ + join_heap_restart = 100, + join_heap_r_restart = 200 +}; struct join_event { @@ -664,9 +664,9 @@ public: } } - inline void fire_event (ULONG heap, join_time time, join_type type) + inline void fire_event (int heap, join_time time, join_type type, int join_id) { - FireEtwGCJoin_V1(heap, time, type, GetClrInstanceId()); + FireEtwGCJoin_V2(heap, time, type, GetClrInstanceId(), join_id); } void join (gc_heap* gch, int join_id) @@ -684,7 +684,7 @@ public: dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d", flavor, join_id, (LONG)(join_struct.join_lock))); - fire_event (gch->heap_number, time_start, type_join); + fire_event (gch->heap_number, time_start, type_join, join_id); //busy wait around the color if (color == join_struct.lock_color) @@ -728,7 +728,7 @@ respin: flavor, join_id, (LONG)(join_struct.join_lock))); } - fire_event (gch->heap_number, time_end, type_join); + fire_event (gch->heap_number, time_end, type_join, join_id); // last thread out should reset event if (FastInterlockDecrement(&join_struct.join_restart) == 0) @@ -746,7 +746,7 @@ respin: } else { - fire_event (gch->heap_number, time_start, type_last_join); + fire_event (gch->heap_number, time_start, type_last_join, join_id); join_struct.joined_p = TRUE; dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id)); @@ -785,7 +785,7 @@ respin: { dprintf (JOIN_LOG, ("r_join() Waiting...")); - fire_event (gch->heap_number, time_start, type_join); + fire_event (gch->heap_number, time_start, type_join, join_id); //busy wait around the color if (!join_struct.wait_done) @@ -822,7 +822,7 @@ respin: dprintf (JOIN_LOG, ("r_join() done")); } - fire_event (gch->heap_number, time_end, type_join); + fire_event (gch->heap_number, time_end, type_join, join_id); #ifdef JOIN_STATS // parallel execution starts here @@ -835,6 +835,7 @@ respin: } else { + fire_event (gch->heap_number, time_start, type_first_r_join, join_id); return TRUE; } } @@ -877,7 +878,7 @@ respin: } #endif //JOIN_STATS - fire_event (100, time_start, type_restart); + fire_event (join_heap_restart, time_start, type_restart, -1); assert (join_struct.joined_p); join_struct.joined_p = FALSE; join_struct.join_lock = join_struct.n_threads; @@ -889,7 +890,7 @@ respin: // printf("Set joined_event %d\n", !join_struct.lock_color); - fire_event (100, time_end, type_restart); + fire_event (join_heap_restart, time_end, type_restart, -1); #ifdef JOIN_STATS start[thd] = GetCycleCount32(); @@ -906,8 +907,10 @@ respin: { if (join_struct.n_threads != 1) { + fire_event (join_heap_r_restart, time_start, type_restart, -1); join_struct.wait_done = TRUE; join_struct.joined_event[first_thread_arrived].Set(); + fire_event (join_heap_r_restart, time_end, type_restart, -1); } } @@ -2588,6 +2591,7 @@ alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1]; dynamic_data gc_heap::dynamic_data_table [NUMBERGENERATIONS+1]; gc_history_per_heap gc_heap::gc_data_per_heap; +size_t gc_heap::maxgen_pinned_compact_before_advance = 0; SPTR_IMPL_NS_INIT(BYTE, WKS, gc_heap, alloc_allocated, 0); @@ -2634,7 +2638,7 @@ SPTR_IMPL_NS_INIT(CFinalize, WKS, gc_heap, finalize_queue, 0); void gen_to_condemn_tuning::print (int heap_num) { #ifdef DT_LOG - dprintf (DT_LOG_0, ("condemned reasons")); + dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition)); dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header)); gc_condemn_reason_gen r_gen; for (int i = 0; i < gcrg_max; i++) @@ -2658,29 +2662,35 @@ void gen_to_condemn_tuning::print (int heap_num) void gc_generation_data::print (int heap_num, int gen_num) { -#ifdef SIMPLE_DPRINTF -#ifdef DT_LOG - dprintf (DT_LOG_0, ("[%2d]gen%d beg %Id fl %Id fo %Id end %Id fl %Id fo %Id in %Id out %Id surv %Id alloc %Id", +#if defined(SIMPLE_DPRINTF) && defined(DT_LOG) + dprintf (DT_LOG_0, ("[%2d]gen%d beg %Id fl %Id fo %Id end %Id fl %Id fo %Id in %Id p %Id np %Id alloc %Id", heap_num, gen_num, size_before, free_list_space_before, free_obj_space_before, size_after, free_list_space_after, free_obj_space_after, - in, out, - surv, + in, pinned_surv, npinned_surv, new_allocation)); -#endif //DT_LOG -#endif //SIMPLE_DPRINTF +#endif //SIMPLE_DPRINTF && DT_LOG } -void gc_history_per_heap::print (int heap_num) +void gc_history_per_heap::print() { -#ifdef DT_LOG +#if defined(SIMPLE_DPRINTF) && defined(DT_LOG) for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++) { - gen_data[i].print (heap_num, i); + gen_data[i].print (heap_index, i); } - dprintf (DT_LOG_0, ("[%2d]mp %d", heap_num, mem_pressure)); + + dprintf (DT_LOG_0, ("fla %Id flr %Id esa %Id ca %Id pa %Id paa %Id, rfle %d, ec %Id", + maxgen_size_info.free_list_allocated, + maxgen_size_info.free_list_rejected, + maxgen_size_info.end_seg_allocated, + maxgen_size_info.condemned_allocated, + maxgen_size_info.pinned_allocated, + maxgen_size_info.pinned_allocated_advance, + maxgen_size_info.running_free_list_efficiency, + extra_gen0_committed)); int mechanism = 0; gc_mechanism_descr* descr = 0; @@ -2693,12 +2703,12 @@ void gc_history_per_heap::print (int heap_num) { descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i]; dprintf (DT_LOG_0, ("[%2d]%s%s", - heap_num, + heap_index, descr->name, (descr->descr)[mechanism])); } } -#endif //DT_LOG +#endif //SIMPLE_DPRINTF && DT_LOG } void gc_history_global::print() @@ -2713,44 +2723,69 @@ void gc_history_global::print() str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N'); } - dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|")); + dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|")); dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings)); - dprintf (DT_LOG_0, ("Condemned gen%d(%s), youngest budget %Id(%d)", + dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %Id(%d), memload %d", condemned_generation, str_gc_reasons[reason], + str_gc_pause_modes[pause_mode], final_youngest_desired, - gen0_reduction_count)); + gen0_reduction_count, + mem_pressure)); #endif //DT_LOG } +void gc_heap::fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num) +{ + maxgen_size_increase* maxgen_size_info = &(current_gc_data_per_heap->maxgen_size_info); + FireEtwGCPerHeapHistory_V3(GetClrInstanceId(), + (BYTE*)(maxgen_size_info->free_list_allocated), + (BYTE*)(maxgen_size_info->free_list_rejected), + (BYTE*)(maxgen_size_info->end_seg_allocated), + (BYTE*)(maxgen_size_info->condemned_allocated), + (BYTE*)(maxgen_size_info->pinned_allocated), + (BYTE*)(maxgen_size_info->pinned_allocated_advance), + maxgen_size_info->running_free_list_efficiency, + current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons0(), + current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons1(), + current_gc_data_per_heap->mechanisms[gc_compact], + current_gc_data_per_heap->mechanisms[gc_heap_expand], + current_gc_data_per_heap->heap_index, + (BYTE*)(current_gc_data_per_heap->extra_gen0_committed), + (max_generation + 2), + sizeof (gc_generation_data), + &(current_gc_data_per_heap->gen_data[0])); + + current_gc_data_per_heap->print(); + current_gc_data_per_heap->gen_to_condemn_reasons.print (heap_num); +} + void gc_heap::fire_pevents() { #ifndef CORECLR settings.record (&gc_data_global); gc_data_global.print(); - FireEtwGCGlobalHeapHistory_V1(gc_data_global.final_youngest_desired, + FireEtwGCGlobalHeapHistory_V2(gc_data_global.final_youngest_desired, gc_data_global.num_heaps, gc_data_global.condemned_generation, gc_data_global.gen0_reduction_count, gc_data_global.reason, gc_data_global.global_mechanims_p, - GetClrInstanceId()); + GetClrInstanceId(), + gc_data_global.pause_mode, + gc_data_global.mem_pressure); #ifdef MULTIPLE_HEAPS for (int i = 0; i < gc_heap::n_heaps; i++) { gc_heap* hp = gc_heap::g_heaps[i]; gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap(); - current_gc_data_per_heap->print (i); - current_gc_data_per_heap->gen_to_condemn_reasons.print (i); - FireEtwGCPerHeapHistorySpecial(*current_gc_data_per_heap, sizeof(hp->gc_data_per_heap), (UINT8)GetClrInstanceId()); + fire_per_heap_hist_event (current_gc_data_per_heap, hp->heap_number); } #else gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); - FireEtwGCPerHeapHistorySpecial(*current_gc_data_per_heap, sizeof(gc_data_per_heap), (UINT8)GetClrInstanceId()); - current_gc_data_per_heap->print (0); - current_gc_data_per_heap->gen_to_condemn_reasons.print (heap_number); + fire_per_heap_hist_event (current_gc_data_per_heap, heap_number); #endif #endif //!CORECLR } @@ -4159,10 +4194,15 @@ static size_t get_valid_segment_size (BOOL large_seg=FALSE) } #ifdef MULTIPLE_HEAPS - if (g_SystemInfo.dwNumberOfProcessors > 4) - initial_seg_size /= 2; - if (g_SystemInfo.dwNumberOfProcessors > 8) - initial_seg_size /= 2; +#ifdef _WIN64 + if (!large_seg) +#endif //_WIN64 + { + if (g_SystemInfo.dwNumberOfProcessors > 4) + initial_seg_size /= 2; + if (g_SystemInfo.dwNumberOfProcessors > 8) + initial_seg_size /= 2; + } #endif //MULTIPLE_HEAPS // if seg_size is small but not 0 (0 is default if config not set) @@ -5502,6 +5542,8 @@ void gc_mechanisms::record (gc_history_global* history) history->condemned_generation = condemned_generation; history->gen0_reduction_count = gen0_reduction_count; history->reason = reason; + history->pause_mode = (int)pause_mode; + history->mem_pressure = entry_memory_load; history->global_mechanims_p = 0; // start setting the boolean values. @@ -6389,7 +6431,7 @@ BYTE*& card_table_lowest_address (DWORD* c_table) DWORD* translate_card_table (DWORD* ct) { - return (DWORD*)((BYTE*)ct - size_card_of (0, card_table_lowest_address( ct))); + return (DWORD*)((BYTE*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(DWORD)); } inline @@ -6805,15 +6847,29 @@ int gc_heap::grow_brick_card_tables (BYTE* start, GetProcessMemoryLoad (&st); BYTE* top = (BYTE*)0 + Align ((size_t)(st.ullTotalVirtual)); size_t ps = ha-la; - BYTE* highest = max ((saved_g_lowest_address + 2*ps), saved_g_highest_address); - //BYTE* highest = saved_g_highest_address; - if (highest > top) +#ifdef _WIN64 + if (ps > (ULONGLONG)200*1024*1024*1024) + ps += (ULONGLONG)100*1024*1024*1024; + else +#endif //_WIN64 + ps *= 2; + + if (saved_g_lowest_address < g_lowest_address) { - highest = top; + if (ps > (size_t)g_lowest_address) + saved_g_lowest_address = (BYTE*)OS_PAGE_SIZE; + else + { + assert (((size_t)g_lowest_address - ps) >= OS_PAGE_SIZE); + saved_g_lowest_address = min (saved_g_lowest_address, (g_lowest_address - ps)); + } } - if (highest > saved_g_highest_address) + + if (saved_g_highest_address > g_highest_address) { - saved_g_highest_address = highest; + saved_g_highest_address = max ((saved_g_lowest_address + ps), saved_g_highest_address); + if (saved_g_highest_address > top) + saved_g_highest_address = top; } } dprintf (GC_TABLE_LOG, ("Growing card table [%Ix, %Ix[", @@ -10624,6 +10680,15 @@ alloc_list& allocator::alloc_list_of (unsigned int bn) return buckets [bn-1]; } +size_t& allocator::alloc_list_damage_count_of (unsigned int bn) +{ + assert (bn < num_buckets); + if (bn == 0) + return first_bucket.alloc_list_damage_count(); + else + return buckets [bn-1].alloc_list_damage_count(); +} + void allocator::unlink_item (unsigned int bn, BYTE* item, BYTE* prev_item, BOOL use_undo_p) { //unlink the free_item @@ -10632,7 +10697,9 @@ void allocator::unlink_item (unsigned int bn, BYTE* item, BYTE* prev_item, BOOL { if (use_undo_p && (free_list_undo (prev_item) == UNDO_EMPTY)) { + assert (item == free_list_slot (prev_item)); free_list_undo (prev_item) = item; + alloc_list_damage_count_of (bn)++; } free_list_slot (prev_item) = free_list_slot(item); } @@ -10748,17 +10815,21 @@ void allocator::copy_from_alloc_list (alloc_list* fromalist) //new items may have been added during the plan phase //items may have been unlinked. BYTE* free_item = alloc_list_head_of (i); - while (free_item) + size_t count = alloc_list_damage_count_of (i); + while (free_item && count) { assert (((CObjectHeader*)free_item)->IsFree()); if ((free_list_undo (free_item) != UNDO_EMPTY)) { + count--; free_list_slot (free_item) = free_list_undo (free_item); free_list_undo (free_item) = UNDO_EMPTY; } free_item = free_list_slot (free_item); } + + alloc_list_damage_count_of (i) = 0; } #ifdef DEBUG BYTE* tail_item = alloc_list_tail_of (i); @@ -10776,19 +10847,28 @@ void allocator::commit_alloc_list_changes() { //remove the undo info from list. BYTE* free_item = alloc_list_head_of (i); - while (free_item) + size_t count = alloc_list_damage_count_of (i); + while (free_item && count) { assert (((CObjectHeader*)free_item)->IsFree()); - free_list_undo (free_item) = UNDO_EMPTY; + + if (free_list_undo (free_item) != UNDO_EMPTY) + { + free_list_undo (free_item) = UNDO_EMPTY; + count--; + } + free_item = free_list_slot (free_item); } + + alloc_list_damage_count_of (i) = 0; } } } void gc_heap::adjust_limit_clr (BYTE* start, size_t limit_size, alloc_context* acontext, heap_segment* seg, - int align_const) + int align_const, int gen_number) { //probably should pass seg==0 for free lists. if (seg) @@ -10809,7 +10889,9 @@ void gc_heap::adjust_limit_clr (BYTE* start, size_t limit_size, dprintf (3, ("filling up hole [%Ix, %Ix[", (size_t)hole, (size_t)hole + size + Align (min_obj_size, align_const))); // when we are finishing an allocation from a free list // we know that the free area was Align(min_obj_size) larger - make_unused_array (hole, size + Align (min_obj_size, align_const)); + size_t free_obj_size = size + Align (min_obj_size, align_const); + make_unused_array (hole, free_obj_size); + generation_free_obj_space (generation_of (gen_number)) += free_obj_size; } acontext->alloc_ptr = start; } @@ -11273,7 +11355,7 @@ BOOL gc_heap::a_fit_free_list_p (int gen_number, } generation_free_list_space (gen) -= limit; - adjust_limit_clr (free_list, limit, acontext, 0, align_const); + adjust_limit_clr (free_list, limit, acontext, 0, align_const, gen_number); can_fit = TRUE; goto end; @@ -11465,7 +11547,7 @@ BOOL gc_heap::a_fit_free_list_large_p (size_t size, else #endif //BACKGROUND_GC { - adjust_limit_clr (free_list, limit, acontext, 0, align_const); + adjust_limit_clr (free_list, limit, acontext, 0, align_const, gen_number); } //fix the limit to compensate for adjust_limit_clr making it too short @@ -11579,7 +11661,7 @@ found_fit: else #endif //BACKGROUND_GC { - adjust_limit_clr (old_alloc, limit, acontext, seg, align_const); + adjust_limit_clr (old_alloc, limit, acontext, seg, align_const, gen_number); } return TRUE; @@ -12110,7 +12192,7 @@ BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr, { bgc_in_progress = TRUE; size_t last_full_compact_gc_count = get_full_compact_gc_count(); - wait_for_background (awr_loh_oos_bgc); + wait_for_background (awr); size_t current_full_compact_gc_count = get_full_compact_gc_count(); if (current_full_compact_gc_count > last_full_compact_gc_count) { @@ -12173,7 +12255,7 @@ BOOL gc_heap::trigger_full_compact_gc (gc_reason gr, #ifdef BACKGROUND_GC if (recursive_gc_sync::background_running_p()) { - wait_for_background (awr_loh_oos_bgc); + wait_for_background ((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc); dprintf (2, ("waited for BGC - done")); } #endif //BACKGROUND_GC @@ -13141,11 +13223,13 @@ BYTE* gc_heap::allocate_in_older_generation (generation* gen, size_t size, dprintf (3, ("considering free list %Ix", (size_t)free_list)); size_t free_list_size = unused_array_size (free_list); + if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size), old_loc, USE_PADDING_TAIL | pad_in_front)) { dprintf (4, ("F:%Ix-%Id", (size_t)free_list, free_list_size)); + gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, !discard_p); generation_free_list_space (gen) -= free_list_size; remove_gen_free (gen->gen_num, free_list_size); @@ -13879,6 +13963,7 @@ int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation, else { n = max_generation - 1; + gc_data_global.set_mechanism_p (global_elevation); } } else @@ -14278,13 +14363,13 @@ int gc_heap::generation_to_condemn (int n_initial, dprintf (GTC_LOG, ("ml: %d", ms.dwMemoryLoad)); } -#ifdef _WIN64 if (heap_number == 0) { +#ifdef _WIN64 available_physical_mem = ms.ullAvailPhys; +#endif //_WIN64 local_settings->entry_memory_load = ms.dwMemoryLoad; } -#endif //_WIN64 // @TODO: Force compaction more often under GCSTRESS if (ms.dwMemoryLoad >= 90 || low_memory_detected) @@ -14394,6 +14479,7 @@ int gc_heap::generation_to_condemn (int n_initial, dprintf (GTC_LOG, ("%Id left in gen2 alloc (%Id)", dd_new_allocation (dd_max), dd_desired_allocation (dd_max))); n = max_generation; + local_condemn_reasons->set_condition (gen_almost_max_alloc); } } @@ -14521,7 +14607,6 @@ exit: if (check_memory) { - gc_data_per_heap.mem_pressure = ms.dwMemoryLoad; fgm_result.available_pagefile_mb = (size_t)(ms.ullAvailPageFile / (1024 * 1024)); } @@ -14844,7 +14929,8 @@ void gc_heap::gc1() if (n != max_generation) { - for (int gen_number = (n+1); gen_number <= (max_generation+1); gen_number++) + int gen_num_for_data = ((n < (max_generation - 1)) ? (n + 1) : (max_generation + 1)); + for (int gen_number = (n + 1); gen_number <= gen_num_for_data; gen_number++) { gc_data_per_heap.gen_data[gen_number].size_after = generation_size (gen_number); gc_data_per_heap.gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number)); @@ -14852,6 +14938,8 @@ void gc_heap::gc1() } } + gc_data_per_heap.maxgen_size_info.running_free_list_efficiency = (DWORD)(generation_allocator_efficiency (generation_of (max_generation)) * 100); + free_list_info (max_generation, "after computing new dynamic data"); if (heap_number == 0) @@ -14860,7 +14948,7 @@ void gc_heap::gc1() dd_collection_count (dynamic_data_of (0)), settings.condemned_generation, dd_gc_elapsed_time (dynamic_data_of (0)))); - } + } for (int gen_number = 0; gen_number <= (max_generation + 1); gen_number++) { @@ -14872,6 +14960,7 @@ void gc_heap::gc1() if (n < max_generation) { compute_promoted_allocation (1 + n); + dynamic_data* dd = dynamic_data_of (1 + n); size_t new_fragmentation = generation_free_list_space (generation_of (1 + n)) + generation_free_obj_space (generation_of (1 + n)); @@ -15912,6 +16001,8 @@ int gc_heap::garbage_collect (int n) memset (&gc_data_per_heap, 0, sizeof (gc_data_per_heap)); gc_data_per_heap.heap_index = heap_number; + if (heap_number == 0) + memset (&gc_data_global, 0, sizeof (gc_data_global)); memset (&fgm_result, 0, sizeof (fgm_result)); settings.reason = gc_trigger_reason; verify_pinned_queue_p = FALSE; @@ -16108,14 +16199,17 @@ int gc_heap::garbage_collect (int n) #endif //MULTIPLE_HEAPS } - for (int i = 0; i <= (max_generation+1); i++) { - gc_data_per_heap.gen_data[i].size_before = generation_size (i); - generation* gen = generation_of (i); - gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen); - gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen); + int gen_num_for_data = ((settings.condemned_generation < (max_generation - 1)) ? + (settings.condemned_generation + 1) : (max_generation + 1)); + for (int i = 0; i <= gen_num_for_data; i++) + { + gc_data_per_heap.gen_data[i].size_before = generation_size (i); + generation* gen = generation_of (i); + gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen); + gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen); + } } - descr_generations (TRUE); // descr_card_table(); @@ -18523,9 +18617,16 @@ size_t gc_heap::get_total_heap_size() return total_heap_size; } +void fire_mark_event (int heap_num, int root_type, size_t bytes_marked) +{ + dprintf (DT_LOG_0, ("-----------[%d]mark %d: %Id", heap_num, root_type, bytes_marked)); + FireEtwGCMarkWithType (heap_num, GetClrInstanceId(), root_type, bytes_marked); +} + //returns TRUE is an overflow happened. BOOL gc_heap::process_mark_overflow(int condemned_gen_number) { + size_t last_promoted_bytes = promoted_bytes (heap_number); BOOL overflow_p = FALSE; recheck: if ((! (max_overflow_address == 0) || @@ -18563,6 +18664,10 @@ recheck: goto recheck; } + size_t current_promoted_bytes = promoted_bytes (heap_number); + + if (current_promoted_bytes != last_promoted_bytes) + fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_OVERFLOW, (current_promoted_bytes - last_promoted_bytes)); return overflow_p; } @@ -18635,37 +18740,6 @@ void gc_heap::process_mark_overflow_internal (int condemned_gen_number, } } -inline -void fire_mark_event (int heap_num, int mark_num) -{ - switch(mark_num) - { - case ETW_TYPE_GC_MARK_1: - FireEtwGCMarkStackRoots(heap_num, GetClrInstanceId()); - FireEtwPrvGCMarkStackRoots_V1(heap_num, GetClrInstanceId()); - break; - - case ETW_TYPE_GC_MARK_2: - FireEtwGCMarkFinalizeQueueRoots(heap_num, GetClrInstanceId()); - FireEtwPrvGCMarkFinalizeQueueRoots_V1(heap_num, GetClrInstanceId()); - break; - - case ETW_TYPE_GC_MARK_3: - FireEtwGCMarkHandles(heap_num, GetClrInstanceId()); - FireEtwPrvGCMarkHandles_V1(heap_num, GetClrInstanceId()); - break; - - case ETW_TYPE_GC_MARK_4: - FireEtwGCMarkOlderGenerationRoots(heap_num, GetClrInstanceId()); - FireEtwPrvGCMarkCards_V1(heap_num, GetClrInstanceId()); - break; - - default: - _ASSERTE(mark_num==ETW_TYPE_GC_MARK_1 || mark_num==ETW_TYPE_GC_MARK_2 || mark_num==ETW_TYPE_GC_MARK_3 || mark_num==ETW_TYPE_GC_MARK_4); - break; - } -} - // Scanning for promotion for dependent handles need special handling. Because the primary holds a strong // reference to the secondary (when the primary itself is reachable) and this can cause a cascading series of // promotions (the secondary of one handle is or promotes the primary of another) we might need to perform the @@ -18887,6 +18961,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) gen0_must_clear_bricks--; #endif //FFIND_OBJECT + size_t last_promoted_bytes = 0; + promoted_bytes (heap_number) = 0; reset_mark_stack(); @@ -18976,6 +19052,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) if ((condemned_gen_number == max_generation) && (num_sizedrefs > 0)) { CNameSpace::GcScanSizedRefs(GCHeap::Promote, condemned_gen_number, max_generation, &sc); + fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_SIZEDREF, (promoted_bytes (heap_number) - last_promoted_bytes)); + last_promoted_bytes = promoted_bytes (heap_number); #ifdef MULTIPLE_HEAPS gc_t_join.join(this, gc_join_scan_sizedref_done); @@ -18993,7 +19071,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) condemned_gen_number, max_generation, &sc); - fire_mark_event (heap_number, ETW_TYPE_GC_MARK_1); + fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_STACK, (promoted_bytes (heap_number) - last_promoted_bytes)); + last_promoted_bytes = promoted_bytes (heap_number); #ifdef BACKGROUND_GC if (recursive_gc_sync::background_running_p()) @@ -19007,7 +19086,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) finalize_queue->GcScanRoots(GCHeap::Promote, heap_number, 0); #endif // FEATURE_PREMORTEM_FINALIZATION - fire_mark_event (heap_number, ETW_TYPE_GC_MARK_2); + fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_FQ, (promoted_bytes (heap_number) - last_promoted_bytes)); + last_promoted_bytes = promoted_bytes (heap_number); // MTHTS { @@ -19016,7 +19096,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) CNameSpace::GcScanHandles(GCHeap::Promote, condemned_gen_number, max_generation, &sc); - fire_mark_event (heap_number, ETW_TYPE_GC_MARK_3); + fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_HANDLES, (promoted_bytes (heap_number) - last_promoted_bytes)); + last_promoted_bytes = promoted_bytes (heap_number); } #ifdef TRACE_GC @@ -19060,7 +19141,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) dprintf (3, ("marked by cards: %Id", (promoted_bytes (heap_number) - promoted_before_cards))); - fire_mark_event (heap_number, ETW_TYPE_GC_MARK_4); + fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_OLDER, (promoted_bytes (heap_number) - last_promoted_bytes)); + last_promoted_bytes = promoted_bytes (heap_number); } } @@ -19659,7 +19741,7 @@ size_t gc_heap::update_brick_table (BYTE* tree, size_t current_brick, void gc_heap::plan_generation_start (generation* gen, generation* consing_gen, BYTE* next_plug_to_allocate) { #ifdef _WIN64 - // We should never demote big plugs to ephemeral generations. + // We should never demote big plugs to gen0. if (gen == youngest_generation) { heap_segment* seg = ephemeral_heap_segment; @@ -19928,9 +20010,11 @@ retry: if (active_new_gen_number != max_generation) { - if ((active_new_gen_number == (max_generation - 1)) && !demote_gen1_p) + if (active_new_gen_number == (max_generation - 1)) { - advance_pins_for_demotion (consing_gen); + maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); + if (!demote_gen1_p) + advance_pins_for_demotion (consing_gen); } plan_generation_start (generation_of (active_new_gen_number), consing_gen, x); @@ -21294,6 +21378,7 @@ void gc_heap::plan_phase (int condemned_gen_number) else { allocate_in_condemned = TRUE; + new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number, #ifdef SHORT_PLUGS &convert_to_pinned_p, @@ -21514,9 +21599,11 @@ void gc_heap::plan_phase (int condemned_gen_number) { active_new_gen_number--; - if ((active_new_gen_number == (max_generation - 1)) && !demote_gen1_p) + if (active_new_gen_number == (max_generation - 1)) { - advance_pins_for_demotion (consing_gen); + maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)); + if (!demote_gen1_p) + advance_pins_for_demotion (consing_gen); } generation* gen = generation_of (active_new_gen_number); @@ -21628,14 +21715,11 @@ void gc_heap::plan_phase (int condemned_gen_number) #endif //SIMPLE_DPRINTF } -#ifdef FREE_USAGE_STATS if (settings.condemned_generation == (max_generation - 1 )) { size_t plan_gen2_size = generation_plan_size (max_generation); size_t growth = plan_gen2_size - old_gen2_size; - dprintf (1, ("gen2's FL effi: %d", (int)(generation_allocator_efficiency (generation_of (max_generation)) * 100))); - if (growth > 0) { dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, gen1 c alloc: %Id", @@ -21649,9 +21733,11 @@ void gc_heap::plan_phase (int condemned_gen_number) generation_condemned_allocated (generation_of (max_generation - 1)))); } - generation* older_gen = generation_of (settings.condemned_generation + 1); + generation* older_gen = generation_of (settings.condemned_generation + 1); size_t rejected_free_space = generation_free_obj_space (older_gen) - r_free_obj_space; - size_t free_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated; + size_t free_list_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated; + size_t end_seg_allocated = generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated; + size_t condemned_allocated = generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated; dprintf (1, ("older gen's free alloc: %Id->%Id, seg alloc: %Id->%Id, condemned alloc: %Id->%Id", r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen), @@ -21659,30 +21745,27 @@ void gc_heap::plan_phase (int condemned_gen_number) r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen))); dprintf (1, ("this GC did %Id free list alloc(%Id bytes free space rejected), %Id seg alloc and %Id condemned alloc, gen1 condemned alloc is %Id", - free_allocated, - rejected_free_space, - (generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated), - (generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated), - generation_condemned_allocated (generation_of (settings.condemned_generation)))); + free_list_allocated, rejected_free_space, end_seg_allocated, + condemned_allocated, generation_condemned_allocated (generation_of (settings.condemned_generation)))); - float running_free_list_efficiency = 0; - if ((free_allocated + rejected_free_space) != 0) - { - running_free_list_efficiency = (float) (free_allocated) / (float)(free_allocated + rejected_free_space); - } + maxgen_size_increase* maxgen_size_info = &(gc_data_per_heap.maxgen_size_info); + maxgen_size_info->free_list_allocated = free_list_allocated; + maxgen_size_info->free_list_rejected = rejected_free_space; + maxgen_size_info->end_seg_allocated = end_seg_allocated; + maxgen_size_info->condemned_allocated = condemned_allocated; + maxgen_size_info->pinned_allocated = maxgen_pinned_compact_before_advance; + maxgen_size_info->pinned_allocated_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)) - maxgen_pinned_compact_before_advance; - float free_list_efficiency = 0; - if ((generation_free_list_allocated (older_gen) + generation_free_obj_space (older_gen)) != 0) - { - free_list_efficiency = - (float) (generation_free_list_allocated (older_gen)) / (float)(generation_free_list_allocated (older_gen) + generation_free_obj_space (older_gen)); - } +#ifdef FREE_USAGE_STATS + int free_list_efficiency = 0; + if ((free_list_allocated + rejected_free_space) != 0) + free_list_efficiency = (int)(((float) (free_list_allocated) / (float)(free_list_allocated + rejected_free_space)) * (float)100); + + int running_free_list_efficiency = (int)(generation_allocator_efficiency(older_gen)*100); - dprintf (1, ("gen%d running free list alloc effi: %d%%(%d%%), current effi: %d%%", + dprintf (1, ("gen%d free list alloc effi: %d%%, current effi: %d%%", older_gen->gen_num, - (int)(running_free_list_efficiency*100), - (int)(free_list_efficiency*100), - (int)(generation_allocator_efficiency(older_gen)*100))); + free_list_efficiency, running_free_list_efficiency)); dprintf (1, ("gen2 free list change")); for (int j = 0; j < NUM_GEN_POWER2; j++) @@ -21694,8 +21777,8 @@ void gc_heap::plan_phase (int condemned_gen_number) (SSIZE_T)(r_older_gen_free_space[j] - older_gen->gen_free_spaces[j]), (generation_of(max_generation - 1))->gen_plugs[j])); } - } #endif //FREE_USAGE_STATS + } size_t fragmentation = generation_fragmentation (generation_of (condemned_gen_number), @@ -29000,20 +29083,13 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd, if (dd_begin_data_size (dd) == 0) { size_t new_allocation = dd_default_new_allocation (dd); - current_gc_data_per_heap->gen_data[gen_number].new_allocation = new_allocation; - if ((gen_number == 0) && (pass == 1)) - { - current_gc_data_per_heap->gen_data[max_generation+2].new_allocation = new_allocation; - } - + current_gc_data_per_heap->gen_data[gen_number].new_allocation = new_allocation; return new_allocation; } else { float cst; size_t previous_desired_allocation = dd_desired_allocation (dd); - //ptrdiff_t allocation = (previous_desired_allocation - dd_gc_new_allocation (dd)); - ptrdiff_t allocation = (previous_desired_allocation - dd_new_allocation (dd)); size_t current_size = dd_current_size (dd); float max_limit = dd_max_limit (dd); float limit = dd_limit (dd); @@ -29128,23 +29204,18 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd, size_t new_allocation_ret = Align (new_allocation, get_alignment_constant (!(gen_number == (max_generation+1)))); int gen_data_index = gen_number; - if ((gen_number == 0) && (pass == 1)) - { - gen_data_index = max_generation+2; - } gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_data_index]); - gen_data->surv = (size_t)(cst*100); gen_data->new_allocation = new_allocation_ret; dd_surv (dd) = cst; #ifdef SIMPLE_DPRINTF dprintf (1, ("h%d g%d surv: %Id current: %Id alloc: %Id (%d%%) f: %d%% new-size: %Id new-alloc: %Id", - heap_number, gen_number, out, current_size, allocation, + heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)), (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation)); #else dprintf (1,("gen: %d in: %Id out: %Id ", gen_number, generation_allocation_size (generation_of (gen_number)), out)); - dprintf (1,("current: %Id alloc: %Id ", current_size, allocation)); + dprintf (1,("current: %Id alloc: %Id ", current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)))); dprintf (1,(" surv: %d%% f: %d%% new-size: %Id new-alloc: %Id", (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation)); #endif //SIMPLE_DPRINTF @@ -29251,6 +29322,10 @@ size_t gc_heap::compute_in (int gen_number) dd_gc_new_allocation (dd) -= in; + gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); + gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_number]); + gen_data->in = in; + generation_allocation_size (generation_of (gen_number)) = 0; return in; } @@ -29354,7 +29429,6 @@ void gc_heap::compute_new_dynamic_data (int gen_number) gen_data->size_after = total_gen_size; gen_data->free_list_space_after = generation_free_list_space (gen); gen_data->free_obj_space_after = generation_free_obj_space (gen); - gen_data->in = in; if ((settings.pause_mode == pause_low_latency) && (gen_number <= 1)) { @@ -29376,7 +29450,6 @@ void gc_heap::compute_new_dynamic_data (int gen_number) dprintf (2, ("gen: %d final promoted: %Id", gen_number, final_promoted)); dd_freach_previous_promotion (dd) = final_promoted; size_t lower_bound = desired_new_allocation (dd, out-final_promoted, gen_number, 0); - gen_data->out = out - final_promoted; if (settings.condemned_generation == 0) { @@ -29385,9 +29458,6 @@ void gc_heap::compute_new_dynamic_data (int gen_number) } else { - current_gc_data_per_heap->gen_data[max_generation+2] = *gen_data; - current_gc_data_per_heap->gen_data[max_generation+2].out = out; - size_t higher_bound = desired_new_allocation (dd, out, gen_number, 1); // <TODO>This assert was causing AppDomains\unload\test1n\test1nrun.bat to fail</TODO> @@ -29412,11 +29482,13 @@ void gc_heap::compute_new_dynamic_data (int gen_number) } else { - gen_data->out = out; dd_desired_allocation (dd) = desired_new_allocation (dd, out, gen_number, 0); } } + gen_data->pinned_surv = dd_pinned_survived_size (dd); + gen_data->npinned_surv = dd_survived_size (dd) - dd_pinned_survived_size (dd); + dd_gc_new_allocation (dd) = dd_desired_allocation (dd); //update counter dd_promoted_size (dd) = out; @@ -29438,8 +29510,7 @@ void gc_heap::compute_new_dynamic_data (int gen_number) gen_data->size_after = total_gen_size; gen_data->free_list_space_after = generation_free_list_space (large_object_generation); gen_data->free_obj_space_after = generation_free_obj_space (large_object_generation); - gen_data->in = in; - gen_data->out = out; + gen_data->npinned_surv = out; #ifdef BACKGROUND_GC end_loh_size = total_gen_size; #endif //BACKGROUND_GC @@ -29529,7 +29600,7 @@ void gc_heap::decommit_ephemeral_segment_pages() decommit_heap_segment_pages (ephemeral_heap_segment, slack_space); gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); - current_gc_data_per_heap->extra_gen0_committed = (ULONGLONG)(heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment)); + current_gc_data_per_heap->extra_gen0_committed = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment); } size_t gc_heap::new_allocation_limit (size_t size, size_t free_size, int gen_number) @@ -30523,7 +30594,8 @@ void gc_heap::background_ephemeral_sweep() { generation* gen_to_reset = generation_of (i); assert (generation_free_list_space (gen_to_reset) == 0); - assert (generation_free_obj_space (gen_to_reset) == 0); + // Can only assert free_list_space is 0, not free_obj_space as the allocator could have added + // something there. } for (int i = (max_generation - 1); i >= 0; i--) @@ -34282,7 +34354,7 @@ GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode) gc_reason reason = reason_empty; - if (low_memory_p ) + if (low_memory_p) { if (mode & collection_blocking) reason = reason_lowmemory_blocking; @@ -34494,8 +34566,12 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason) #endif //TRACE_GC gc_heap::g_low_memory_status = (reason == reason_lowmemory) || - (reason == reason_lowmemory_blocking) || - g_bLowMemoryFromHost; + (reason == reason_lowmemory_blocking) || + g_bLowMemoryFromHost; + + if (g_bLowMemoryFromHost) + reason = reason_lowmemory_host; + gc_trigger_reason = reason; #ifdef MULTIPLE_HEAPS diff --git a/src/gc/gcimpl.h b/src/gc/gcimpl.h index 3269abc82d..86de9cac1a 100644 --- a/src/gc/gcimpl.h +++ b/src/gc/gcimpl.h @@ -55,6 +55,7 @@ enum gc_reason reason_gcstress = 8, // this turns into reason_induced & gc_mechanisms.stress_induced = true reason_lowmemory_blocking = 9, reason_induced_compacting = 10, + reason_lowmemory_host = 11, reason_max }; diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h index 9da73bc6f7..9736cecdab 100644 --- a/src/gc/gcpriv.h +++ b/src/gc/gcpriv.h @@ -582,9 +582,7 @@ public: BOOL stress_induced; #endif // STRESS_HEAP -#ifdef _WIN64 DWORD entry_memory_load; -#endif //_WIN64 void init_mechanisms(); //for each GC void first_init(); // for the life of the EE @@ -728,13 +726,17 @@ class alloc_list { BYTE* head; BYTE* tail; + size_t damage_count; + public: BYTE*& alloc_list_head () { return head;} BYTE*& alloc_list_tail () { return tail;} + size_t& alloc_list_damage_count(){ return damage_count; } alloc_list() { head = 0; tail = 0; + damage_count = 0; } }; @@ -746,6 +748,7 @@ class allocator alloc_list first_bucket; alloc_list* buckets; alloc_list& alloc_list_of (unsigned int bn); + size_t& alloc_list_damage_count_of (unsigned int bn); public: allocator (unsigned int num_b, size_t fbs, alloc_list* b); @@ -1212,6 +1215,9 @@ public: void verify_heap (BOOL begin_gc_p); #endif //VERIFY_HEAP + PER_HEAP_ISOLATED + void fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num); + PER_HEAP_ISOLATED void fire_pevents(); @@ -1815,7 +1821,7 @@ protected: PER_HEAP void adjust_limit_clr (BYTE* start, size_t limit_size, alloc_context* acontext, heap_segment* seg, - int align_const); + int align_const, int gen_number); PER_HEAP void leave_allocation_segment (generation* gen); @@ -3422,6 +3428,9 @@ protected: PER_HEAP gc_history_per_heap gc_data_per_heap; + PER_HEAP + size_t maxgen_pinned_compact_before_advance; + // dynamic tuning. PER_HEAP BOOL dt_low_ephemeral_space_p (gc_tuning_point tp); diff --git a/src/gc/gcrecord.h b/src/gc/gcrecord.h index 1d2a2cdeef..2dbf8e8842 100644 --- a/src/gc/gcrecord.h +++ b/src/gc/gcrecord.h @@ -61,12 +61,13 @@ enum gc_condemn_reason_condition gen_gen2_too_small = 13, gen_induced_noforce_p = 14, gen_before_bgc = 15, - gcrc_max = 16 + gen_almost_max_alloc = 16, + gcrc_max = 17 }; #ifdef DT_LOG static char* record_condemn_reasons_gen_header = "[cg]i|f|a|t|"; -static char* record_condemn_reasons_condition_header = "[cc]i|e|h|v|l|l|e|m|m|m|m|g|o|s|n|b|"; +static char* record_condemn_reasons_condition_header = "[cc]i|e|h|v|l|l|e|m|m|m|m|g|o|s|n|b|a|"; static char char_gen_number[4] = {'0', '1', '2', '3'}; #endif //DT_LOG @@ -135,6 +136,16 @@ public: return value; } + DWORD get_reasons0() + { + return condemn_reasons_gen; + } + + DWORD get_reasons1() + { + return condemn_reasons_condition; + } + #ifdef DT_LOG char get_gen_char (DWORD value) { @@ -149,11 +160,9 @@ public: void print (int heap_num); }; -// *******IMPORTANT******* -// The data members in this class are specifically -// arranged in decending order by their sizes to guarantee no -// padding - this is important for recording the ETW event -// 'cause ETW stuff will not apply padding. +// Right now these are all size_t's but if you add a type that requires +// padding you should add a pragma pack here since I am firing this as +// a struct in an ETW event. struct gc_generation_data { // data recorded at the beginning of a GC @@ -166,16 +175,24 @@ struct gc_generation_data size_t free_list_space_after; size_t free_obj_space_after; size_t in; - size_t out; - - // The following data is calculated in - // desired_new_allocation. + size_t pinned_surv; + size_t npinned_surv; size_t new_allocation; - size_t surv; void print (int heap_num, int gen_num); }; +struct maxgen_size_increase +{ + size_t free_list_allocated; + size_t free_list_rejected; + size_t end_seg_allocated; + size_t condemned_allocated; + size_t pinned_allocated; + size_t pinned_allocated_advance; + DWORD running_free_list_efficiency; +}; + // The following indicates various mechanisms and one value // related to each one. Each value has its corresponding string // representation so if you change the enum's, make sure you @@ -249,31 +266,19 @@ static gc_mechanism_descr gc_mechanisms_descr[max_mechanism_per_heap] = {"expanded heap ", str_heap_expand_mechanisms}, {"compacted because of ", str_compact_reasons} }; - #endif //DT_LOG int index_of_set_bit (size_t power2); #define mechanism_mask (1 << (sizeof (DWORD) * 8 - 1)) // interesting per heap data we want to record for each GC. -// *******IMPORTANT******* -// The data members in this class are specifically -// arranged in decending order by their sizes to guarantee no -// padding - this is important for recording the ETW event -// 'cause ETW stuff will not apply padding. class gc_history_per_heap { public: - // The reason we use max_generation+3 is because when we are - // condemning 1+, we calculate generation 0 data twice and we'll - // store data from the 2nd pass in gen_data[max_generation+2]. - // For generations > condemned_gen, the values are all 0. - gc_generation_data gen_data[max_generation+3]; + gc_generation_data gen_data[max_generation+2]; + maxgen_size_increase maxgen_size_info; gen_to_condemn_tuning gen_to_condemn_reasons; - // if we got the memory pressure in generation_to_condemn, this - // will record that value; otherwise it's 0. - DWORD mem_pressure; // The mechanisms data is compacted in the following way: // most significant bit indicates if we did the operation. // the rest of the bits indicate the reason @@ -286,7 +291,7 @@ public: DWORD heap_index; - ULONGLONG extra_gen0_committed; + size_t extra_gen0_committed; void set_mechanism (gc_mechanism_per_heap mechanism_per_heap, DWORD value) { @@ -315,45 +320,9 @@ public: return -1; } - void print (int heap_num); + void print(); }; -#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_REDHAWK) - -#if !defined(ETW_INLINE) -#define ETW_INLINE DECLSPEC_NOINLINE __inline -#endif - -ETW_INLINE -ULONG -Etw_GCDataPerHeapSpecial( - __in PCEVENT_DESCRIPTOR Descriptor, - __in LPCGUID EventGuid, - __in gc_history_per_heap gc_data_per_heap, - __in ULONG datasize, - __in UINT8 ClrInstanceId) -{ - REGHANDLE RegHandle = Microsoft_Windows_DotNETRuntimePrivateHandle; -#define ARGUMENT_COUNT_GCDataPerHeapTemplate 2 - ULONG Error = ERROR_SUCCESS; -typedef struct _MCGEN_TRACE_BUFFER { - EVENT_TRACE_HEADER Header; - EVENT_DATA_DESCRIPTOR EventData[ARGUMENT_COUNT_GCDataPerHeapTemplate]; -} MCGEN_TRACE_BUFFER; - - MCGEN_TRACE_BUFFER TraceBuf; - PEVENT_DATA_DESCRIPTOR EventData = TraceBuf.EventData; - - EventDataDescCreate(&EventData[0], &gc_data_per_heap, datasize); - - EventDataDescCreate(&EventData[1], &ClrInstanceId, sizeof(ClrInstanceId)); - - return EventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_GCDataPerHeapTemplate, EventData); -} - -#undef TraceEvent -#endif // FEATURE_EVENT_TRACE && !FEATURE_REDHAWK - // we store up to 32 boolean settings. enum gc_global_mechanism_p { @@ -362,14 +331,10 @@ enum gc_global_mechanism_p global_promotion, global_demotion, global_card_bundles, + global_elevation, max_global_mechanism }; -// *******IMPORTANT******* -// The data members in this class are specifically -// arranged in decending order by their sizes to guarantee no -// padding - this is important for recording the ETW event -// 'cause ETW stuff will not apply padding. struct gc_history_global { // We may apply other factors after we calculated gen0 budget in @@ -380,6 +345,8 @@ struct gc_history_global int condemned_generation; int gen0_reduction_count; gc_reason reason; + int pause_mode; + DWORD mem_pressure; DWORD global_mechanims_p; void set_mechanism_p (gc_global_mechanism_p mechanism) diff --git a/src/gc/objecthandle.cpp b/src/gc/objecthandle.cpp index b93d90d539..8c309056b1 100644 --- a/src/gc/objecthandle.cpp +++ b/src/gc/objecthandle.cpp @@ -146,9 +146,9 @@ void CALLBACK UpdateDependentHandle(_UNCHECKED_OBJECTREF *pObjRef, LPARAM *pExtr Object **pPrimaryRef = (Object **)pObjRef; Object **pSecondaryRef = (Object **)pExtraInfo; - LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT_CLASS("Querying for new location of ", + LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT("Querying for new location of ", pPrimaryRef, "to ", *pPrimaryRef))); - LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT_CLASS(" and ", + LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT(" and ", pSecondaryRef, "to ", *pSecondaryRef))); #ifdef _DEBUG @@ -371,7 +371,7 @@ void CALLBACK UpdatePointer(_UNCHECKED_OBJECTREF *pObjRef, LPARAM *pExtraInfo, L { LIMITED_METHOD_CONTRACT; - LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Querying for new location of ", pObjRef, "to ", *pObjRef))); + LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT("Querying for new location of ", pObjRef, "to ", *pObjRef))); Object **ppRef = (Object **)pObjRef; @@ -536,7 +536,7 @@ void CALLBACK UpdatePointerPinned(_UNCHECKED_OBJECTREF *pObjRef, LPARAM *pExtraI promote_func* callback = (promote_func*) lp2; callback(ppRef, (ScanContext *)lp1, GC_CALL_PINNED); - LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Updating ", pObjRef, "to pinned ", *pObjRef))); + LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT("Updating ", pObjRef, "to pinned ", *pObjRef))); } diff --git a/src/inc/corprof.idl b/src/inc/corprof.idl index bb96b03ef0..894a8850be 100644 --- a/src/inc/corprof.idl +++ b/src/inc/corprof.idl @@ -582,6 +582,7 @@ typedef enum COR_PRF_MONITOR_GC | COR_PRF_MONITOR_SUSPENDS | COR_PRF_MONITOR_CLASS_LOADS | + COR_PRF_MONITOR_EXCEPTIONS | COR_PRF_MONITOR_JIT_COMPILATION, // MONITOR_IMMUTABLE represents all flags that may only be set during initialization. diff --git a/src/inc/eventtrace.h b/src/inc/eventtrace.h index f773fdd1cc..a2daacec60 100644 --- a/src/inc/eventtrace.h +++ b/src/inc/eventtrace.h @@ -231,18 +231,32 @@ namespace ETW // These values are gotten from the gc_reason // in gcimpl.h typedef enum _GC_REASON { - GC_ALLOC_SOH = 0 , - GC_INDUCED = 1 , + GC_ALLOC_SOH = 0, + GC_INDUCED = 1, GC_LOWMEMORY = 2, GC_EMPTY = 3, GC_ALLOC_LOH = 4, GC_OOS_SOH = 5, GC_OOS_LOH = 6, - GC_INDUCED_NOFORCE = 7 + GC_INDUCED_NOFORCE = 7, + GC_GCSTRESS = 8, + GC_LOWMEMORY_BLOCKING = 9, + GC_INDUCED_COMPACTING = 10, + GC_LOWMEMORY_HOST = 11 } GC_REASON; typedef enum _GC_TYPE { - GC_NGC = 0 , GC_BGC = 1 , GC_FGC = 2 + GC_NGC = 0, + GC_BGC = 1, + GC_FGC = 2 } GC_TYPE; + typedef enum _GC_ROOT_KIND { + GC_ROOT_STACK = 0, + GC_ROOT_FQ = 1, + GC_ROOT_HANDLES = 2, + GC_ROOT_OLDER = 3, + GC_ROOT_SIZEDREF = 4, + GC_ROOT_OVERFLOW = 5 + } GC_ROOT_KIND; struct { ULONG Count; ULONG Depth; diff --git a/src/inc/eventtracebase.h b/src/inc/eventtracebase.h index d21353a0f2..517125f49b 100644 --- a/src/inc/eventtracebase.h +++ b/src/inc/eventtracebase.h @@ -450,6 +450,14 @@ namespace ETW friend class ETW::EnumerationLog; #ifdef FEATURE_EVENT_TRACE static VOID SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions); + static VOID SendEventsForJitMethodsHelper(BaseDomain *pDomainFilter, + LoaderAllocator *pLoaderAllocatorFilter, + DWORD dwEventOptions, + BOOL fLoadOrDCStart, + BOOL fUnloadOrDCEnd, + BOOL fSendMethodEvent, + BOOL fSendILToNativeMapEvent, + BOOL fGetReJitIDs); static VOID SendEventsForNgenMethods(Module *pModule, DWORD dwEventOptions); static VOID SendMethodJitStartEvent(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL); static VOID SendMethodILToNativeMapEvent(MethodDesc * pMethodDesc, DWORD dwEventOptions, ReJITID rejitID); @@ -874,15 +882,9 @@ McGenEventProviderEnabled( #define ETW_PROVIDER_ENABLED(ProviderSymbol) \ ProviderSymbol##_Context.IsEnabled -#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId)\ - MCGEN_ENABLE_CHECK(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, GCPerHeapHistory) ?\ - Etw_GCDataPerHeapSpecial(&GCPerHeapHistory, &GarbageCollectionPrivateId, DataPerHeap, DataSize, ClrInstanceId)\ - : ERROR_SUCCESS\ - #else #define ETW_PROVIDER_ENABLED(ProviderSymbol) TRUE -#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId) 0 #endif // FEATURE_EVENT_TRACE diff --git a/src/inc/fxretarget.h b/src/inc/fxretarget.h index 755cba67da..ee2f751f10 100644 --- a/src/inc/fxretarget.h +++ b/src/inc/fxretarget.h @@ -295,6 +295,7 @@ const FrameworkConfig g_arFxPolicy[] = {L"System.Net.NetworkInformation", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported}, {L"System.Net.Primitives", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported}, {L"System.Net.Requests", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported}, + {L"System.Net.WebHeaderCollection", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported}, {L"System.ObjectModel", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported}, {L"System.Reflection", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported}, {L"System.Reflection.Emit", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported}, diff --git a/src/inc/stdmacros.h b/src/inc/stdmacros.h index 578b19b16d..b410bdda25 100644 --- a/src/inc/stdmacros.h +++ b/src/inc/stdmacros.h @@ -294,7 +294,6 @@ inline ULONG RoundUpToPower2(ULONG x) #define DBG_IPTR_NAME(iptr) \ (iptr) ? "interior" : "base" - #define LOG_HANDLE_OBJECT_CLASS(str1, hnd, str2, obj) \ str1 FMT_HANDLE str2 FMT_OBJECT FMT_CLASS "\n", \ DBG_ADDR(hnd), DBG_ADDR(obj), DBG_CLASS_NAME_OBJ(obj) @@ -308,6 +307,15 @@ inline ULONG RoundUpToPower2(ULONG x) DBG_PIN_NAME(pin), DBG_IPTR_NAME(iptr), \ DBG_ADDR(obj), DBG_CLASS_NAME_IPTR(obj,iptr) +#define LOG_HANDLE_OBJECT(str1, hnd, str2, obj) \ + str1 FMT_HANDLE str2 FMT_OBJECT "\n", \ + DBG_ADDR(hnd), DBG_ADDR(obj) + +#define LOG_PIPTR_OBJECT(obj, pin, iptr) \ + FMT_PIPTR FMT_ADDR "\n", \ + DBG_PIN_NAME(pin), DBG_IPTR_NAME(iptr), \ + DBG_ADDR(obj) + #define UNIQUE_LABEL_DEF(a,x) a##x #define UNIQUE_LABEL_DEF_X(a,x) UNIQUE_LABEL_DEF(a,x) #ifdef _MSC_VER diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp index 8181957f35..409fc64542 100644 --- a/src/jit/assertionprop.cpp +++ b/src/jit/assertionprop.cpp @@ -459,8 +459,8 @@ void Compiler::optAddCopies() tree->gtOp.gtOp1 = newAsgn; tree->gtOp.gtOp2 = copyAsgn; - tree->gtFlags |= ( newAsgn->gtFlags & GTF_GLOB_EFFECT); - tree->gtFlags |= (copyAsgn->gtFlags & GTF_GLOB_EFFECT); + tree->gtFlags |= ( newAsgn->gtFlags & GTF_ALL_EFFECT); + tree->gtFlags |= (copyAsgn->gtFlags & GTF_ALL_EFFECT); } #ifdef DEBUG diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 0762e61da4..70bd108d5b 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -5419,9 +5419,11 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, #else // !CPU_LOAD_STORE_ARCH - // Code size for each instruction. We need this because the + // Code size for each instruction. We need this because the // backward branch is hard-coded with the number of bytes to branch. - + // The encoding differs based on the architecture and what register is + // used (namely, using RAX has a smaller encoding). + // // loop: // For x86 // test [esp + eax], eax 3 @@ -5440,23 +5442,27 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, // sub rbp, 0x1000 7 // cmp rbp, -frameSize 7 // jge loop 2 + getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0); inst_RV_IV(INS_sub, initReg, CORINFO_PAGE_SIZE, EA_PTRSIZE); inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE); - int extraBytesForBackJump = 0; + + int bytesForBackwardJump; #ifdef _TARGET_AMD64_ - extraBytesForBackJump = ((initReg == REG_EAX) ? 3 : 5); -#endif // _TARGET_AMD64_ - inst_IV(INS_jge, -15 - extraBytesForBackJump); // Branch backwards to Start of Loop + assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets. + bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20); +#else // !_TARGET_AMD64_ + assert(initReg == REG_EAX); + bytesForBackwardJump = -15; +#endif // !_TARGET_AMD64_ + + inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop #endif // !CPU_LOAD_STORE_ARCH *pInitRegZeroed = false; // The initReg does not contain zero #ifdef _TARGET_XARCH_ - // The backward branch above depends upon using EAX (and for Amd64 funclets EBP) - assert((initReg == REG_EAX) AMD64_ONLY(|| (initReg == REG_EBP))); - if (pushedStubParam) { // pop eax diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index b6c139e678..63efcf0ffd 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -3550,14 +3550,28 @@ void CodeGen::genCodeForCpObj(GenTreeCpObj* cpObjNode) // src = RSI and dst = RDI. // Either these registers must not contain lclVars, or they must be dying or marked for spill. // This is because these registers are incremented as we go through the struct. - if (srcAddr->gtRegNum == REG_RSI) - { - assert(!genIsRegCandidateLocal(srcAddr) || (srcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) != 0); - } - if (dstAddr->gtRegNum == REG_RDI) - { - assert(!genIsRegCandidateLocal(dstAddr) || (dstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) != 0); - } + GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy(); + GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy(); + unsigned srcLclVarNum = BAD_VAR_NUM; + unsigned dstLclVarNum = BAD_VAR_NUM; + bool isSrcAddrLiveOut = false; + bool isDstAddrLiveOut = false; + if (genIsRegCandidateLocal(actualSrcAddr)) + { + srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum; + isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + if (genIsRegCandidateLocal(actualDstAddr)) + { + dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum; + isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0); + } + assert((actualSrcAddr->gtRegNum != REG_RSI) || + !isSrcAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut)); + assert((actualDstAddr->gtRegNum != REG_RDI) || + !isDstAddrLiveOut || + ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut)); #endif // DEBUG // Consume these registers. diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 8dc070618e..9a624d830e 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1968,14 +1968,6 @@ public: // is such an object pointer. bool gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd); - - // Assignment trees which contain an unmanged PInvoke call need to have a simple op1 - // in order to prevent us from have a TYP_BYREF live accross a call to a PInvoke - // If necessary this method will morph such an assignment to honor this restriction - // - GenTreePtr gtCheckReorderAssignmentForUnmanagedCall(GenTreePtr tree); - - //------------------------------------------------------------------------- GenTreePtr gtFoldExpr (GenTreePtr tree); @@ -3873,6 +3865,14 @@ public: return m_switchDescMap; } + // Invalidate the map of unique switch block successors. For example, since the hash key of the map + // depends on block numbers, we must invalidate the map when the blocks are renumbered, to ensure that + // we don't accidentally look up and return the wrong switch data. + void InvalidateUniqueSwitchSuccMap() + { + m_switchDescMap = nullptr; + } + // Requires "switchBlock" to be a block that ends in a switch. Returns // the corresponding SwitchUniqueSuccSet. SwitchUniqueSuccSet GetDescriptorForSwitch(BasicBlock* switchBlk); @@ -5048,7 +5048,7 @@ protected : void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to); // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in "redirectMap", - // change "blk" so that "blk3" is this successor. + // change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated. void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap); // Marks the containsCall information to "lnum" and any parent loops. diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 54472600f8..12987cff62 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -2316,16 +2316,28 @@ void Compiler::fgDfsInvPostOrder() // mark in this step. BlockSet_ValRet_T startNodes = fgDomFindStartNodes(); - // Make sure fgFirstBB is still there, even if it participates in a loop. - // Review: it might be better to do this: + // Make sure fgEnterBlks are still there in startNodes, even if they participate in a loop (i.e., there is + // an incoming edge into the block). + assert(fgEnterBlksSetValid); + +#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_) + // // BlockSetOps::UnionD(this, startNodes, fgEnterBlks); - // instead, but this causes problems on ARM, because we for BBJ_CALLFINALLY/BBJ_ALWAYS pairs, we add the BBJ_ALWAYS + // + // This causes problems on ARM, because we for BBJ_CALLFINALLY/BBJ_ALWAYS pairs, we add the BBJ_ALWAYS // to the enter blocks set to prevent flow graph optimizations from removing it and creating retless call finallies // (BBF_RETLESS_CALL). This leads to an incorrect DFS ordering in some cases, because we start the recursive walk // from the BBJ_ALWAYS, which is reachable from other blocks. A better solution would be to change ARM to avoid // creating retless calls in a different way, not by adding BBJ_ALWAYS to fgEnterBlks. + // + // So, let us make sure at least fgFirstBB is still there, even if it participates in a loop. BlockSetOps::AddElemD(this, startNodes, 1); assert(fgFirstBB->bbNum == 1); +#else + BlockSetOps::UnionD(this, startNodes, fgEnterBlks); +#endif + + assert(BlockSetOps::IsMember(this, startNodes, fgFirstBB->bbNum)); // Call the recursive helper. unsigned postIndex = 1; @@ -10510,7 +10522,7 @@ void Compiler::fgRemoveBlock(BasicBlock* block, // If we've cached any mappings from switch blocks to SwitchDesc's (which contain only the // *unique* successors of the switch block), invalidate that cache, since an entry in one of // the SwitchDescs might be removed. - m_switchDescMap = NULL; + InvalidateUniqueSwitchSuccMap(); noway_assert((block == fgFirstBB) || (bPrev && (bPrev->bbNext == block))); noway_assert(!(block->bbFlags & BBF_DONT_REMOVE)); @@ -11113,6 +11125,9 @@ bool Compiler::fgRenumberBlocks() if (renumbered || newMaxBBNum) { NewBasicBlockEpoch(); + + // The key in the unique switch successor map is dependent on the block number, so invalidate that cache. + InvalidateUniqueSwitchSuccMap(); } else { diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index d37b3bf181..76c9f504a6 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -5881,6 +5881,17 @@ GenTreePtr Compiler::gtCloneExpr(GenTree * tree, } break; + case GT_LEA: + { + GenTreeAddrMode* addrModeOp = tree->AsAddrMode(); + copy = new(this, GT_LEA) GenTreeAddrMode(addrModeOp->TypeGet(), + addrModeOp->Base(), + addrModeOp->Index(), + addrModeOp->gtScale, + addrModeOp->gtOffset); + } + break; + #ifdef FEATURE_SIMD case GT_SIMD: { @@ -9764,32 +9775,41 @@ CHK_OVF: * or overflow - when dividing MIN by -1 */ case GT_DIV: - if (!i2) return tree; - if (UINT32(i1) == 0x80000000 && i2 == -1) + case GT_MOD: + case GT_UDIV: + case GT_UMOD: + if (INT32(i2) == 0) { - /* In IL we have to throw an exception */ + // Division by zero: + // We have to evaluate this expression and throw an exception return tree; } - i1 = INT32(i1) / INT32(i2); break; - - case GT_MOD: - if (!i2) return tree; - if (UINT32(i1) == 0x80000000 && i2 == -1) + else if ((INT32(i2) == -1) && + (UINT32(i1) == 0x80000000)) { - /* In IL we have to throw an exception */ + // Overflow Division: + // We have to evaluate this expression and throw an exception return tree; } - i1 = INT32(i1) % INT32(i2); break; - case GT_UDIV: - if (!i2) return tree; - if (UINT32(i1) == 0x80000000 && i2 == -1) return tree; - i1 = UINT32(i1) / UINT32(i2); break; - - case GT_UMOD: - if (!i2) return tree; - if (UINT32(i1) == 0x80000000 && i2 == -1) return tree; - i1 = UINT32(i1) % UINT32(i2); break; + if (tree->gtOper == GT_DIV) + { + i1 = INT32(i1) / INT32(i2); + } + else if (tree->gtOper == GT_MOD) + { + i1 = INT32(i1) % INT32(i2); + } + else if (tree->gtOper == GT_UDIV) + { + i1 = UINT32(i1) / UINT32(i2); + } + else + { + assert(tree->gtOper == GT_UMOD); + i1 = UINT32(i1) % UINT32(i2); + } + break; default: return tree; @@ -11034,36 +11054,6 @@ bool Compiler::gtHasCatchArg(GenTreePtr tree) return false; } -/***************************************************************************** - * - * Callback that checks for a tree that is a GT_CALL to an umanaged target (PInvoke) - */ - -static Compiler::fgWalkResult gtFindUnmanagedCall(GenTreePtr * pTree, - Compiler::fgWalkData * /* data */) -{ - // If the current node is not a GT_CALL then continue searching... - if ((*pTree)->OperGet() != GT_CALL) - return Compiler::WALK_CONTINUE; - - // If the current call node does not have the GTF_CALL_UNMANAGED flag set then continue searching... - if (((*pTree)->gtFlags & GTF_CALL_UNMANAGED) == 0) - return Compiler::WALK_CONTINUE; - - // We found an unmanaged call site - return Compiler::WALK_ABORT; -} - -/*****************************************************************************/ -bool Compiler::gtHasUnmanagedCall(GenTreePtr tree) -{ - // Does the current subtree contain an unmanaged call? - if (fgWalkTreePre(&tree, gtFindUnmanagedCall) == WALK_ABORT) - { - return true; - } - return false; -} //------------------------------------------------------------------------ // gtHasCallOnStack: @@ -11154,189 +11144,6 @@ void Compiler::gtCheckQuirkAddrExposedLclVar(GenTreePtr tree, GenTreeStack* pare #endif } -//------------------------------------------------------------------------ -// gtCheckReorderAssignmentForUnmanagedCall: -// -// Assignment trees which contain an unmanged PInvoke call need to have a simple op1 -// in order to prevent us from have a TYP_BYREF live accross a call to a PInvoke -// This is because we are not allowed to keep a GC pointer in a register accross a -// PInvoke call sit. We cannot update them when/if a GC occurs during the PInvoke call. -// -// Arguments: -// tree - An assignment or assignOp GenTree node, that has not yet been morphed -// -// Output: -// tree - An unchanged tree -// or a mutated tree when we need to evaluate the address for op1 into a temp -// -// We will mutate the assignment tree when op1 has a side-effect that might require us -// to evaluate it before op2 and if the op2 tree contains an unmanaged call site -// -GenTreePtr Compiler::gtCheckReorderAssignmentForUnmanagedCall(GenTreePtr tree) -{ - assert(tree->OperKind() & GTK_ASGOP); - -#if INLINE_NDIRECT - // Does this method have any unmanaged calls? - if (info.compCallUnmanaged != 0) - { - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtGetOp2(); - var_types asgTyp = op1->TypeGet(); - - // Does op1 have a side-effect that causes us to evaluate it before op2? - // Or, does it contain a BYREF that must not be kept live across an unmanaged call? - if (op1->gtFlags & GTF_ALL_EFFECT) - { - // Does op2 contain an unmanged call? - if (gtHasUnmanagedCall(op2)) - { -/* - +---------+----------+ - tree | GT_ASG | - +---------+----------+ - | - / \ - / \ - / \ - +-----+-----+ +-----+-----+ - op1 | . . . | op2 | . . . | - +-----+-----+ +-----+-----+ - GTF_ALL_EFFECT HasUnmanagedCall - -*/ - // op1 could be a sequence of GT_COMMA nodes - // if it is then we traverse down the op2 side - // until we reach a non comma node - // and we set splice to the last GT_COMMA node that we visited - // - GenTreePtr splice = nullptr; - GenTreePtr op1Val = op1; - while (op1Val->gtOper == GT_COMMA) - { - splice = op1; - op1Val = op1->gtOp.gtOp2; - } - - // Now op1Val is now the actual target of the assignment - // it could be a GT_IND in which case we just remove the GT_IND - // otherwise we take its address by adding a GT_ADDR above it. - - GenTreePtr op1Addr; - var_types addrTyp; - - if (op1Val->gtOper == GT_IND) - { - op1Addr = op1Val->gtOp.gtOp1; - addrTyp = op1Addr->TypeGet(); - - // You cannot have a GT_IND on a TYP_REF - assert(addrTyp != TYP_REF); - } - else - { - addrTyp = TYP_BYREF; - op1Addr = gtNewOperNode(GT_ADDR, addrTyp, op1Val); - } - - // addrTyp is the now type of address that we have. - // - // If we added a GT_ADDR node then we have to assume that we have a TYP_BYREF - // if we had a GT_IND then the child node tells us if we have a TYP_BYREF - // or an TYP_I_IMPL pointer. - // - - // If addrTyp is not a GC type (i.e. TYP_BYREF) we can return - // - if (addrTyp != TYP_BYREF) - return tree; // early exit, tree is unmodified - - // DebugCheckFlags can complain later if we have a GTF_GLOB_REF flag here - // - if (op1Val->gtOper == GT_FIELD) - { - // &clsVar doesn't need GTF_GLOB_REF - op1Addr->gtFlags &= ~GTF_GLOB_REF; - } - - // We will transform the tree so that the assignment will not have to - // evaluate op1 and keep it live across the unmanged call in op2 - // - unsigned newTempLclNum = lvaGrabTemp(true DEBUGARG("Force eval op1")); - GenTreePtr asgAddr = gtNewTempAssign(newTempLclNum, op1Addr); - - if (splice != nullptr) - { - GenTreePtr commaVal = op1; - assert(commaVal->gtOper == GT_COMMA); - while (commaVal->gtOper == GT_COMMA) - { - commaVal->gtType = TYP_VOID; - commaVal = commaVal->gtOp.gtOp2; - } - splice->gtOp.gtOp2 = asgAddr; - } - else - { - op1 = asgAddr; - } - - GenTreePtr asgDest = gtNewOperNode(GT_IND, asgTyp, gtNewLclvNode(newTempLclNum, addrTyp)); - - op2 = gtNewOperNode(tree->gtOper, tree->gtType, asgDest, op2); - op2->gtFlags |= GTF_ASG; - - tree->ChangeOper(GT_COMMA); - tree->gtType = TYP_VOID; - tree->gtOp.gtOp1 = op1; - tree->gtOp.gtOp2 = op2; - tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS); - tree->gtFlags |= op1->gtFlags & GTF_ALL_EFFECT; - tree->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT; - -/* - +------+-------+ - asgAddr | GT_ASG | - +------+-------+ - | - / \ - / \ - +-----+-----+ +-----+-----+ - | LclVar | | GT_ADDR | op1Addr - | lclNum | | or GT_ADD | - +-----+-----+ +-----+-----+ - - - +------+-------+ - tree | GT_COMMA* | (Mutated from GT_ASG) - +------+-------+ - | - / \ - / \ - / \ - +-----+-----+ +-----+-----+ - old op1 | GT_COMMA | | GT_ASG | gtNewOperNode - or asgAddr | GT_ASG | +-----+-----+ - +-----+-----+ | - / \ - / \ - +-----+-----+ +-----+-----+ - asgDest | GT_IND | | old op2 | - +-----+-----+ +-----+-----+ - | HasUnmanagedCall - +-----+-----+ - | LclVar | - | lclNum | - +-----+-----+ -*/ - } - } - } -#endif - return tree; -} - - //Checks to see if we're allowed to optimize Type::op_Equality or Type::op_Inequality on this operand. //We're allowed to convert to GT_EQ/GT_NE if one of the operands is: // 1) The result of Object::GetType diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 08afe0da67..d1b2472333 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -608,8 +608,10 @@ public: return gtVNPair.SetConservative(vn); } } - - + void ClearVN() + { + gtVNPair = ValueNumPair(); // Initializes both elements to "NoVN". + } //--------------------------------------------------------------------- // The first set of flags can be used with a large set of nodes, and diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 691db8071a..aa3e4ab311 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -247,6 +247,7 @@ void initRefTypeNames() refTypeNames[RefTypeZeroInit] = "RefTypeZeroInit"; refTypeNames[RefTypeUpperVectorSaveDef] = "RefTypeUpperVectorSaveDef"; refTypeNames[RefTypeUpperVectorSaveUse] = "RefTypeUpperVectorSaveUse"; + refTypeNames[RefTypeKillGCRefs] = "RefTypeKillGCRefs"; shortRefTypeNames[RefTypeInvalid] = "Invl"; shortRefTypeNames[RefTypeDef] = "Def "; @@ -260,6 +261,7 @@ void initRefTypeNames() shortRefTypeNames[RefTypeZeroInit] = "Zero"; shortRefTypeNames[RefTypeUpperVectorSaveDef] = "UVSv"; shortRefTypeNames[RefTypeUpperVectorSaveUse] = "UVRs"; + shortRefTypeNames[RefTypeKillGCRefs] = "KlGC"; } #endif // DEBUG @@ -664,7 +666,7 @@ LinearScan::associateRefPosWithInterval(RefPosition *rp) } else { - assert(rp->refType == RefTypeBB); + assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs)); } } @@ -2628,6 +2630,11 @@ LinearScan::buildKillPositionsForNode(GenTree* tree, } } } + + if (tree->IsCall() && (tree->gtFlags & GTF_CALL_UNMANAGED) != 0) + { + RefPosition * pos = newRefPosition((Interval *)nullptr, currentLoc, RefTypeKillGCRefs, tree, (allRegs(TYP_REF) & ~RBM_ARG_REGS)); + } return true; } @@ -5130,6 +5137,39 @@ void LinearScan::unassignPhysReg( RegRecord * regRec, RefPosition* spillRefPosit } } +//------------------------------------------------------------------------ +// spillGCRefs: Spill any GC-type intervals that are currently in registers.a +// +// Arguments: +// killRefPosition - The RefPosition for the kill +// +// Return Value: +// None. +// +void +LinearScan::spillGCRefs(RefPosition* killRefPosition) +{ + // For each physical register that can hold a GC type, + // if it is occupied by an interval of a GC type, spill that interval. + regMaskTP candidateRegs = killRefPosition->registerAssignment; + while (candidateRegs != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(candidateRegs); + candidateRegs &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); + Interval* assignedInterval = regRecord->assignedInterval; + if (assignedInterval == nullptr || + (assignedInterval->isActive == false) || + !varTypeIsGC(assignedInterval->registerType)) + { + continue; + } + unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr)); +} + //------------------------------------------------------------------------ // processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated // @@ -5708,7 +5748,7 @@ LinearScan::allocateRegisters() } else { - assert(refType == RefTypeBB); + assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)); } // For the purposes of register resolution, we handle the DummyDefs before @@ -5764,6 +5804,12 @@ LinearScan::allocateRegisters() continue; } + if (refType == RefTypeKillGCRefs) + { + spillGCRefs(currentRefPosition); + continue; + } + // If this is a FixedReg, disassociate any inactive constant interval from this register. // Otherwise, do nothing. if (refType == RefTypeFixedReg) @@ -7026,6 +7072,10 @@ LinearScan::resolveRegisters() // mismatch. assert(getNextBlock() == nullptr || !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn, currentRefPosition->getInterval()->getVarIndex(compiler))); + currentRefPosition->referent->recentRefPosition = currentRefPosition; + continue; + case RefTypeKillGCRefs: + // No action to take at resolution time, and no interval to update recentRefPosition for. continue; case RefTypeDummyDef: case RefTypeParamDef: @@ -9279,6 +9329,11 @@ LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, Interval* interval, reg } break; + // Done with GC Kills + case LSRA_EVENT_DONE_KILL_GC_REFS: + printf("DoneKillGC "); + break; + // Block boundaries case LSRA_EVENT_START_BB: assert(currentBlock != nullptr); @@ -9763,12 +9818,19 @@ LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBl } printf(" %s%c%c ", shortRefTypeNames[refPosition->refType], lastUseChar, delayChar); } - else + else if (refPosition->isPhysRegRef) { RegRecord* regRecord = refPosition->getReg(); printf(regNameFormat, getRegName(regRecord->regNum)); printf(" %s ", shortRefTypeNames[refPosition->refType]); } + else + { + assert(refPosition->refType == RefTypeKillGCRefs); + // There's no interval or reg name associated with this. + printf(regNameFormat, " "); + printf(" %s ", shortRefTypeNames[refPosition->refType]); + } } //------------------------------------------------------------------------ @@ -9832,7 +9894,7 @@ LinearScan::verifyFinalAllocation() regRecord->recentRefPosition = currentRefPosition; regNum = regRecord->regNum; } - else + else if (currentRefPosition->isIntervalRef()) { interval = currentRefPosition->getInterval(); interval->recentRefPosition = currentRefPosition; @@ -10065,6 +10127,25 @@ LinearScan::verifyFinalAllocation() } } break; + case RefTypeKillGCRefs: + // No action to take. + // However, we will assert that, at resolution time, no registers contain GC refs. + { + DBEXEC(VERBOSE, printf(" ")); + regMaskTP candidateRegs = currentRefPosition->registerAssignment; + while (candidateRegs != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(candidateRegs); + candidateRegs &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); + Interval* assignedInterval = regRecord->assignedInterval; + assert (assignedInterval == nullptr || + !varTypeIsGC(assignedInterval->registerType)); + } + } + break; + case RefTypeExpUse: case RefTypeDummyDef: // Do nothing; these will be handled by the RefTypeBB. diff --git a/src/jit/lsra.h b/src/jit/lsra.h index 6b1d3909aa..e57873fb65 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -81,6 +81,7 @@ enum RefType : unsigned char RefTypeZeroInit = (0x30 | RefTypeDef), RefTypeUpperVectorSaveDef = (0x40 | RefTypeDef), RefTypeUpperVectorSaveUse = (0x40 | RefTypeUse), + RefTypeKillGCRefs = 0x80, RefTypeBound, }; @@ -722,6 +723,8 @@ private: void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition); + void spillGCRefs(RefPosition* killRefPosition); + /***************************************************************************** * For Resolution phase ****************************************************************************/ @@ -846,6 +849,7 @@ private: LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, + LSRA_EVENT_DONE_KILL_GC_REFS, // Block boundaries LSRA_EVENT_START_BB, @@ -1311,7 +1315,7 @@ public: unsigned rpNum; // The unique RefPosition number, equal to its index in the refPositions list. Only used for debugging dumps. #endif // DEBUG - bool isIntervalRef() { return !isPhysRegRef; } + bool isIntervalRef() { return (!isPhysRegRef && (referent != nullptr)); } // isTrueDef indicates that the RefPosition is a non-update def of a non-internal // interval diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index c21bb2991d..7f5ca56dc6 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -3748,8 +3748,8 @@ void Compiler::fgFixupStructReturn(GenTreePtr call) /***************************************************************************** * * A little helper used to rearrange nested commutative operations. The - * effect is that nested commutative operations are transformed into a - * 'left-deep' tree, i.e. into something like this: + * effect is that nested associative, commutative operations are transformed + * into a 'left-deep' tree, i.e. into something like this: * * (((a op b) op c) op d) op... */ @@ -3758,51 +3758,55 @@ void Compiler::fgFixupStructReturn(GenTreePtr call) void Compiler::fgMoveOpsLeft(GenTreePtr tree) { - GenTreePtr op1 = tree->gtOp.gtOp1; - GenTreePtr op2 = tree->gtOp.gtOp2; - genTreeOps oper = tree->OperGet(); + GenTreePtr op1; + GenTreePtr op2; + genTreeOps oper; - noway_assert(GenTree::OperIsCommutative(oper)); - noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || - oper == GT_AND || oper == GT_MUL); - noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder); - noway_assert(oper == op2->gtOper); + do + { + op1 = tree->gtOp.gtOp1; + op2 = tree->gtOp.gtOp2; + oper = tree->OperGet(); - // Commutativity doesn't hold if overflow checks are needed + noway_assert(GenTree::OperIsCommutative(oper)); + noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || + oper == GT_AND || oper == GT_MUL); + noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder); + noway_assert(oper == op2->gtOper); - if (tree->gtOverflowEx() || op2->gtOverflowEx()) - return; + // Commutativity doesn't hold if overflow checks are needed - if (gtIsActiveCSE_Candidate(op2)) - { - // If we have marked op2 as a CSE candidate, - // we can't perform a commutative reordering - // because any value numbers that we computed for op2 - // will be incorrect after performing a commutative reordering - // - return; - } + if (tree->gtOverflowEx() || op2->gtOverflowEx()) + return; - if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT)) - return; + if (gtIsActiveCSE_Candidate(op2)) + { + // If we have marked op2 as a CSE candidate, + // we can't perform a commutative reordering + // because any value numbers that we computed for op2 + // will be incorrect after performing a commutative reordering + // + return; + } - // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators - if ( ((oper == GT_ADD) || (oper == GT_MUL)) - && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0) ) - { - return; - } + if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT)) + return; - if ( (tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN ) - { - // We could deal with this, but we were always broken and just hit the assert - // below regarding flags, which means it's not frequent, so will just bail out. - // See #195514 - return; - } + // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators + if ( ((oper == GT_ADD) || (oper == GT_MUL)) + && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0) ) + { + return; + } + + if ( (tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN ) + { + // We could deal with this, but we were always broken and just hit the assert + // below regarding flags, which means it's not frequent, so will just bail out. + // See #195514 + return; + } - do - { noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx()); GenTreePtr ad1 = op2->gtOp.gtOp1; @@ -4191,6 +4195,10 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) // Store information about it. GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int) elemOffs, elemStructType)); + // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it. + + GenTreePtr indTree = tree; + // Did we create a bndsChk tree? if (bndsChk) { @@ -4215,13 +4223,30 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree) tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree); } + // Currently we morph the tree to perform some folding operations prior + // to attaching fieldSeq info and labeling constant array index contributions + // fgMorphTree(tree); - if (fgIsCommaThrow(tree)) - return tree; - + // Ideally we just want to proceed to attaching fieldSeq info and labeling the + // constant array index contributions, but the morphing operation may have changed + // the 'tree' into something that now unconditionally throws an exception. + // + // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified + // or it could be left unchanged. If it is unchanged then we should not return, + // instead we should proceed to attaching fieldSeq info, etc... + // GenTreePtr arrElem = tree->gtEffectiveVal(); + if (fgIsCommaThrow(tree)) + { + if ((arrElem != indTree) || // A new tree node may have been created + (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT + { + return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc.. + } + } + assert(!fgGlobalMorph || (arrElem->gtFlags & GTF_MORPHED)); addr = arrElem->gtOp.gtOp1; @@ -7668,10 +7693,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* ma #if !FEATURE_STACK_FP_X87 tree = fgMorphForRegisterFP(tree); #endif - if (tree->OperKind() & GTK_ASGOP) - { - tree = gtCheckReorderAssignmentForUnmanagedCall(tree); - } } genTreeOps oper = tree->OperGet(); @@ -10920,28 +10941,31 @@ ASG_OP: case GT_XOR: - /* "x ^ -1" is "~x" */ - - if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == -1)) - { - tree->ChangeOper(GT_NOT); - tree->gtOp2 = NULL; - DEBUG_DESTROY_NODE(op2); - } - else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == -1)) - { - tree->ChangeOper(GT_NOT); - tree->gtOp2 = NULL; - DEBUG_DESTROY_NODE(op2); - } - else if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1) && - op1->OperIsCompare()) + if (!optValnumCSE_phase) { - /* "binaryVal ^ 1" is "!binaryVal" */ - gtReverseCond(op1); - DEBUG_DESTROY_NODE(op2); - DEBUG_DESTROY_NODE(tree); - return op1; + /* "x ^ -1" is "~x" */ + + if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == -1)) + { + tree->ChangeOper(GT_NOT); + tree->gtOp2 = NULL; + DEBUG_DESTROY_NODE(op2); + } + else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == -1)) + { + tree->ChangeOper(GT_NOT); + tree->gtOp2 = NULL; + DEBUG_DESTROY_NODE(op2); + } + else if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1) && + op1->OperIsCompare()) + { + /* "binaryVal ^ 1" is "!binaryVal" */ + gtReverseCond(op1); + DEBUG_DESTROY_NODE(op2); + DEBUG_DESTROY_NODE(tree); + return op1; + } } break; @@ -14509,7 +14533,12 @@ Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr case GT_ADD: assert(axc != AXC_Addr); - if (axc == AXC_Ind) + // See below about treating pointer operations as wider indirection. + if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF) + { + axcStack->Push(AXC_IndWide); + } + else if (axc == AXC_Ind) { // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context. // If it's an add of a constant and an address, and the constant represents a field, @@ -14522,16 +14551,55 @@ Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr } return WALK_CONTINUE; + // !!! Treat Pointer Operations as Wider Indirection + // + // If we are performing pointer operations, make sure we treat that as equivalent to a wider + // indirection. This is because the pointers could be pointing to the address of struct fields + // and could be used to perform operations on the whole struct or passed to another method. + // + // When visiting a node in this pre-order walk, we do not know if we would in the future + // encounter a GT_ADDR of a GT_FIELD below. + // + // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF. + // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a + // wider indirection context down the expr tree. + // + // Example, in unsafe code, + // + // IL_000e 12 00 ldloca.s 0x0 + // IL_0010 7c 02 00 00 04 ldflda 0x4000002 + // IL_0015 12 00 ldloca.s 0x0 + // IL_0017 7c 01 00 00 04 ldflda 0x4000001 + // IL_001c 59 sub + // + // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then + // consider GT_SUB to be equivalent of an AXC_IndWide. + // + // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat + // them as AXC_IndWide. + // + + // BINOP + case GT_SUB: + case GT_MUL: + case GT_DIV: + case GT_UDIV: + case GT_OR: + case GT_XOR: + case GT_AND: + case GT_LSH: + case GT_RSH: + case GT_RSZ: case GT_EQ: case GT_NE: case GT_LT: case GT_LE: - case GT_GE: case GT_GT: + case GT_GE: + // UNOP case GT_CAST: - if (tree->gtOp.gtOp1->gtType == TYP_BYREF) + if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) || (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF))) { - // if code is trying to convert a byref or compare one, pessimize. axcStack->Push(AXC_IndWide); return WALK_CONTINUE; } diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index 753b0aaf6e..b51e772c62 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -1900,7 +1900,10 @@ NO_LOOP: ; continue; // Otherwise... - mod = mod || optCanonicalizeLoopNest(loopInd); + if (optCanonicalizeLoopNest(loopInd)) + { + mod = true; + } } if (mod) { @@ -1954,7 +1957,6 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap) { blk->bbJumpSwt->bbsDstTab[i] = newJumpDest; redirected = true; - } } // If any redirections happend, invalidate the switch table map for the switch. @@ -2002,21 +2004,32 @@ void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to) } } +// Canonicalize the loop nest rooted at parent loop 'loopInd'. +// Returns 'true' if the flow graph is modified. bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd) { - bool res = false; + bool modified = false; + // Is the top of the current loop not in any nested loop? if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd) { - res = res || optCanonicalizeLoop(loopInd); + if (optCanonicalizeLoop(loopInd)) + { + modified = true; + } } + for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - res = res || optCanonicalizeLoopNest(child); + if (optCanonicalizeLoopNest(child)) + { + modified = true; + } } - return res; + + return modified; } bool Compiler::optCanonicalizeLoop(unsigned char loopInd) @@ -2027,6 +2040,9 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) if (t->bbNatLoopNum == loopInd) return false; + JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to canonicalize\n", + loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum); + // Otherwise, the top of this loop is also part of a nested loop. // // Insert a new unique top for this loop. We must be careful to put this new @@ -2064,6 +2080,29 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) // ... // BB12 BBJ_ALWAYS => BB30 // + // Another possibility is that the "first" block of the loop nest can be the first block + // of a "try" region that also has other predecessors than those in the loop, or even in + // the "try" region (since blocks can target the first block of a "try" region). For example: + // + // BB08 try { + // ... + // BB10 BBJ_ALWAYS => BB08 + // ... + // BB12 BBJ_ALWAYS => BB08 + // BB13 } + // ... + // BB20 BBJ_ALWAYS => BB08 + // ... + // BB25 BBJ_ALWAYS => BB08 + // + // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop + // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the + // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within + // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the + // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches + // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal + // situation of branching to a non-first block of a 'try' region. + // // We can also have a loop nest where the "first" block is outside of a "try" region // and the back edges are inside a "try" region, for example: // @@ -2106,6 +2145,35 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd) blockMap->Set(t, newT); optRedirectBlock(b, blockMap); + // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue + // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated + // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However, + // they might have been prevented from participating in the loop nest due to different EH nesting, or some + // other reason. + // + // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed + // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times. + // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source + // edge of the blockMap, so nothing will happen. + for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext) + { + BasicBlock* topPredBlock = topPred->flBlock; + + // Skip if topPredBlock is in the loop. + // Note that this uses block number to detect membership in the loop. We are adding blocks during canonicalization, + // and those block numbers will be new, and larger than previous blocks. However, we work outside-in, so we + // shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists. + if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum) + { + JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n", + topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum); + continue; + } + + JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, newT->bbNum); + optRedirectBlock(topPredBlock, blockMap); + } + assert(newT->bbNext == f); if (f != t) { @@ -4748,6 +4816,8 @@ bool Compiler::optNarrowTree(GenTreePtr tree, if (doit) { tree->gtType = genActualType(dstt); + tree->ClearVN(); + optNarrowTree(op2, srct, dstt, true); // We may also need to cast away the upper bits of op1 if (srcSize == 8) @@ -4781,8 +4851,10 @@ COMMON_BINOP: noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType)); noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType)); - if (!optNarrowTree(op1, srct, dstt, doit) || - !optNarrowTree(op2, srct, dstt, doit)) + if (gtIsActiveCSE_Candidate(op1) || + gtIsActiveCSE_Candidate(op2) || + !optNarrowTree(op1, srct, dstt, doit) || + !optNarrowTree(op2, srct, dstt, doit) ) { noway_assert(doit == false); return false; @@ -4796,6 +4868,7 @@ COMMON_BINOP: tree->gtFlags &= ~GTF_MUL_64RSLT; tree->gtType = genActualType(dstt); + tree->ClearVN(); } return true; @@ -4808,6 +4881,7 @@ NARROW_IND: if (doit && (dstSize <= genTypeSize(tree->gtType))) { tree->gtType = genSignedType(dstt); + tree->ClearVN(); /* Make sure we don't mess up the variable type */ if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD)) @@ -4870,6 +4944,7 @@ NARROW_IND: // The result type of a GT_CAST is never a small type. // Use genActualType to widen dstt when it is a small types. tree->gtType = genActualType(dstt); + tree->ClearVN(); } } @@ -4879,12 +4954,16 @@ NARROW_IND: return false; case GT_COMMA: - if (optNarrowTree(op2, srct, dstt, doit)) + if (!gtIsActiveCSE_Candidate(op2) && + optNarrowTree(op2, srct, dstt, doit)) { /* Simply change the type of the tree */ if (doit) + { tree->gtType = genActualType(dstt); + tree->ClearVN(); + } return true; } return false; diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp index f6b452686f..70e4e96add 100644 --- a/src/jit/rangecheck.cpp +++ b/src/jit/rangecheck.cpp @@ -343,7 +343,11 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTreePtr expr, SearchPath* path) // If the rhs expr is constant, then it is not part of the dependency // loop which has to increase monotonically. ValueNum vn = expr->gtVNPair.GetConservative(); - if (m_pCompiler->vnStore->IsVNConstant(vn)) + if (path->GetCount() > MAX_SEARCH_DEPTH) + { + return false; + } + else if (m_pCompiler->vnStore->IsVNConstant(vn)) { return true; } @@ -885,10 +889,16 @@ bool RangeCheck::AddOverflows(Limit& limit1, Limit& limit2) // Does the bin operation overflow. bool RangeCheck::DoesBinOpOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr op1, GenTreePtr op2, SearchPath* path) { - if (DoesOverflow(block, stmt, op1, path) || DoesOverflow(block, stmt, op2, path)) + if (!path->Lookup(op1) && DoesOverflow(block, stmt, op1, path)) { return true; } + + if (!path->Lookup(op2) && DoesOverflow(block, stmt, op2, path)) + { + return true; + } + // Get the cached ranges of op1 Range* op1Range = nullptr; if (!GetRangeMap()->Lookup(op1, &op1Range)) @@ -983,15 +993,17 @@ bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTree JITDUMP("Does overflow %p?\n", dspPtr(expr)); path->Set(expr, block); - noway_assert(path->GetCount() <= MAX_SEARCH_DEPTH); - bool overflows = true; // Remove hashtable entry for expr when we exit the present scope. Range range = Limit(Limit::keUndef); ValueNum vn = expr->gtVNPair.GetConservative(); + if (path->GetCount() > MAX_SEARCH_DEPTH) + { + overflows = true; + } // If the definition chain resolves to a constant, it doesn't overflow. - if (m_pCompiler->vnStore->IsVNConstant(vn)) + else if (m_pCompiler->vnStore->IsVNConstant(vn)) { overflows = false; } @@ -1033,7 +1045,7 @@ struct Node // eg.: merge((0, dep), (dep, dep)) = (0, dep) Range RangeCheck::ComputeRange(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent)) { - bool newlyAdded = path->Set(expr, block); + bool newlyAdded = !path->Set(expr, block); Range range = Limit(Limit::keUndef); ValueNum vn = expr->gtVNPair.GetConservative(); diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp index 8d6a21edf5..59fed64056 100644 --- a/src/jit/simdcodegenxarch.cpp +++ b/src/jit/simdcodegenxarch.cpp @@ -801,8 +801,9 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) var_types targetType = simdNode->TypeGet(); InstructionSet iset = compiler->getSIMDInstructionSet(); - regNumber op1Reg = genConsumeReg(op1); - regNumber op2Reg = genConsumeReg(op2); + genConsumeOperands(simdNode); + regNumber op1Reg = op1->gtRegNum; + regNumber op2Reg = op2->gtRegNum; regNumber otherReg = op2Reg; // Vector<Int>.Mul: @@ -990,8 +991,9 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) var_types targetType = simdNode->TypeGet(); InstructionSet iset = compiler->getSIMDInstructionSet(); - regNumber op1Reg = genConsumeReg(op1); - regNumber op2Reg = genConsumeReg(op2); + genConsumeOperands(simdNode); + regNumber op1Reg = op1->gtRegNum; + regNumber op2Reg = op2->gtRegNum; regNumber otherReg = op2Reg; switch(simdNode->gtSIMDIntrinsicID) @@ -1211,8 +1213,9 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) assert(targetType == baseType); assert(varTypeIsFloating(baseType)); - regNumber op1Reg = genConsumeReg(op1); - regNumber op2Reg = genConsumeReg(op2); + genConsumeOperands(simdNode); + regNumber op1Reg = op1->gtRegNum; + regNumber op2Reg = op2->gtRegNum; regNumber tmpReg = REG_NA; // For SSE, or AVX with 32-byte vectors, we need an additional Xmm register as scratch. @@ -1390,7 +1393,8 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) // GetItem has 2 operands: // - the source of SIMD type (op1) // - the index of the value to be returned. - regNumber srcReg = genConsumeReg(op1); + genConsumeOperands(simdNode); + regNumber srcReg = op1->gtRegNum; // SSE2 doesn't have an instruction to implement this intrinsic if the index is not a constant. // For the non-constant case, we will use the SIMD temp location to store the vector, and @@ -1403,7 +1407,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) noway_assert(simdInitTempVarNum != BAD_VAR_NUM); bool isEBPbased; unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased); - regNumber indexReg = genConsumeReg(op2); + regNumber indexReg = op2->gtRegNum; // Store the vector to the temp location. getEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), @@ -1579,8 +1583,9 @@ CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) assert(op2->TypeGet() == baseType); assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType))); - regNumber op1Reg = genConsumeReg(op1); - regNumber op2Reg = genConsumeReg(op2); + genConsumeOperands(simdNode); + regNumber op1Reg = op1->gtRegNum; + regNumber op2Reg = op2->gtRegNum; // TODO-CQ: For AVX we don't need to do a copy because it supports 3 operands plus immediate. if (targetReg != op1Reg) @@ -1694,17 +1699,7 @@ CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode) assert(genCountBits(treeNode->gtRsvdRegs) == 1); regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); - bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0); - if (!reverseOps) - { - genConsumeReg(addr); - genConsumeReg(data); - } - else - { - genConsumeReg(data); - genConsumeReg(addr); - } + genConsumeOperands(treeNode->AsOp()); // 8-byte write getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum, 0); diff --git a/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs b/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs index a3e1348c79..1851dc3666 100644 --- a/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs +++ b/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs @@ -500,15 +500,13 @@ namespace System.Diagnostics.Tracing { [Event(140, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable, Task = Tasks.GetResponse, Opcode = EventOpcode.Start, Version = 1)] private void GetResponseStart(long id, string uri, bool success, bool synchronous) { - if (IsEnabled()) - WriteEvent(140, id, uri, success, synchronous); + WriteEvent(140, id, uri, success, synchronous); } [Event(141, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable, Task = Tasks.GetResponse, Opcode = EventOpcode.Stop, Version = 1)] private void GetResponseStop(long id, bool success, bool synchronous, int statusCode) { - if (IsEnabled()) - WriteEvent(141, id, success, synchronous, statusCode); + WriteEvent(141, id, success, synchronous, statusCode); } // In the desktop runtime they don't use Tasks for the point at which the response happens, which means that the @@ -516,35 +514,37 @@ namespace System.Diagnostics.Tracing { [Event(142, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable, Task = Tasks.GetRequestStream, Opcode = EventOpcode.Start, Version = 1)] private void GetRequestStreamStart(long id, string uri, bool success, bool synchronous) { - if (IsEnabled()) - WriteEvent(142, id, uri, success, synchronous); + WriteEvent(142, id, uri, success, synchronous); } [Event(143, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable, Task = Tasks.GetRequestStream, Opcode = EventOpcode.Stop, Version = 1)] private void GetRequestStreamStop(long id, bool success, bool synchronous) { - if (IsEnabled()) - WriteEvent(143, id, success, synchronous); + WriteEvent(143, id, success, synchronous); } [NonEvent, System.Security.SecuritySafeCritical] public unsafe void BeginGetResponse(object id, string uri, bool success, bool synchronous) { - GetResponseStart((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), uri, success, synchronous); + if (IsEnabled()) + GetResponseStart(IdForObject(id), uri, success, synchronous); } [NonEvent, System.Security.SecuritySafeCritical] public unsafe void EndGetResponse(object id, bool success, bool synchronous, int statusCode) { - GetResponseStop((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), success, synchronous, statusCode); + if (IsEnabled()) + GetResponseStop(IdForObject(id), success, synchronous, statusCode); } [NonEvent, System.Security.SecuritySafeCritical] public unsafe void BeginGetRequestStream(object id, string uri, bool success, bool synchronous) { - GetRequestStreamStart((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), uri, success, synchronous); + if (IsEnabled()) + GetRequestStreamStart(IdForObject(id), uri, success, synchronous); } [NonEvent, System.Security.SecuritySafeCritical] public unsafe void EndGetRequestStream(object id, bool success, bool synchronous) { - GetRequestStreamStop((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), success, synchronous); + if (IsEnabled()) + GetRequestStreamStop(IdForObject(id), success, synchronous); } // id - represents a correlation ID that allows correlation of two activities, one stamped by @@ -607,6 +607,13 @@ namespace System.Diagnostics.Tracing { ThreadTransferReceive((long) *((void**) JitHelpers.UnsafeCastToStackPointer(ref id)), kind, info); } + // return a stable ID for a an object. We use the hash code which is not truely unique but is + // close enough for now at least. we add to it 0x7FFFFFFF00000000 to make it distinguishable + // from the style of ID that simply casts the object reference to a long (since old versions of the + // runtime will emit IDs of that form). + private static long IdForObject(object obj) { + return obj.GetHashCode() + 0x7FFFFFFF00000000; + } } } diff --git a/src/mscorlib/src/System/Environment.cs b/src/mscorlib/src/System/Environment.cs index b517b58b66..163fd19eda 100644 --- a/src/mscorlib/src/System/Environment.cs +++ b/src/mscorlib/src/System/Environment.cs @@ -1055,7 +1055,12 @@ namespace System { ==============================================================================*/ public static Version Version { get { - return new Version(ThisAssembly.InformationalVersion); + + // Previously this represented the File version of mscorlib.dll. Many other libraries in the framework and outside took dependencies on the first three parts of this version + // remaining constant throughout 4.x. From 4.0 to 4.5.2 this was fine since the file version only incremented the last part.Starting with 4.6 we switched to a file versioning + // scheme that matched the product version. In order to preserve compatibility with existing libraries, this needs to be hard-coded. + + return new Version(4,0,30319,42000); } } diff --git a/src/mscorlib/src/System/Globalization/CultureInfo.cs b/src/mscorlib/src/System/Globalization/CultureInfo.cs index 2cdad273d2..3bc804650e 100644 --- a/src/mscorlib/src/System/Globalization/CultureInfo.cs +++ b/src/mscorlib/src/System/Globalization/CultureInfo.cs @@ -287,6 +287,28 @@ namespace System.Globalization { return toReturn; } + + [SecuritySafeCritical] + internal static bool SetCultureInfoForUserPreferredLanguageInAppX(CultureInfo ci) + { + // If running within a compilation process (mscorsvw.exe, for example), it is illegal to + // load any non-mscorlib assembly for execution. Since WindowsRuntimeResourceManager lives + // in System.Runtime.WindowsRuntime, caller will need to fall back to default Win32 value, + // which should be fine because we should only ever need to access FX resources during NGEN. + // FX resources are always loaded from satellite assemblies - even in AppX processes (see the + // comments in code:System.Resources.ResourceManager.SetAppXConfiguration for more details). + if (AppDomain.IsAppXNGen) + { + return false; + } + + if (s_WindowsRuntimeResourceManager == null) + { + s_WindowsRuntimeResourceManager = ResourceManager.GetWinRTResourceManager(); + } + + return s_WindowsRuntimeResourceManager.SetGlobalResourceContextDefaultCulture(ci); + } #endif //////////////////////////////////////////////////////////////////////// @@ -694,7 +716,19 @@ namespace System.Globalization { } set { - Thread.CurrentThread.CurrentCulture = value; +#if FEATURE_APPX + if (value == null) { + throw new ArgumentNullException("value"); + } + + if (AppDomain.IsAppXModel()) { + if (SetCultureInfoForUserPreferredLanguageInAppX(value)) { + // successfully set the culture, otherwise fallback to legacy path + return; + } + } +#endif + Thread.CurrentThread.CurrentCulture = value; } } @@ -780,7 +814,19 @@ namespace System.Globalization { } set { - Thread.CurrentThread.CurrentUICulture = value; +#if FEATURE_APPX + if (value == null) { + throw new ArgumentNullException("value"); + } + + if (AppDomain.IsAppXModel()) { + if (SetCultureInfoForUserPreferredLanguageInAppX(value)) { + // successfully set the culture, otherwise fallback to legacy path + return; + } + } +#endif + Thread.CurrentThread.CurrentUICulture = value; } } diff --git a/src/mscorlib/src/System/Globalization/DateTimeFormat.cs b/src/mscorlib/src/System/Globalization/DateTimeFormat.cs index 58f35bd2ef..82bc4b6f79 100644 --- a/src/mscorlib/src/System/Globalization/DateTimeFormat.cs +++ b/src/mscorlib/src/System/Globalization/DateTimeFormat.cs @@ -929,6 +929,7 @@ namespace System { case Calendar.CAL_HEBREW: case Calendar.CAL_JULIAN: case Calendar.CAL_UMALQURA: + case Calendar.CAL_PERSIAN: timeOnlySpecialCase = true; dtfi = DateTimeFormatInfo.InvariantInfo; break; diff --git a/src/mscorlib/src/System/Globalization/DateTimeParse.cs b/src/mscorlib/src/System/Globalization/DateTimeParse.cs index 1af84a44f6..bbece12b4f 100644 --- a/src/mscorlib/src/System/Globalization/DateTimeParse.cs +++ b/src/mscorlib/src/System/Globalization/DateTimeParse.cs @@ -3683,14 +3683,21 @@ new DS[] { DS.ERROR, DS.TX_NNN, DS.TX_NNN, DS.TX_NNN, DS.ERROR, DS.ERROR, // Otherwise it is unspecified and we consume no characters break; case ':': - if (!str.Match(dtfi.TimeSeparator)) { + // We match the separator in time pattern with the character in the time string if both equal to ':' or the date separator is matching the characters in the date string + // We have to exclude the case when the time separator is more than one character and starts with ':' something like "::" for instance. + if (((dtfi.TimeSeparator.Length > 1 && dtfi.TimeSeparator[0] == ':') || !str.Match(':')) && + !str.Match(dtfi.TimeSeparator)) { // A time separator is expected. result.SetFailure(ParseFailureKind.Format, "Format_BadDateTime", null); return false; } break; case '/': - if (!str.Match(dtfi.DateSeparator)) { + // We match the separator in date pattern with the character in the date string if both equal to '/' or the date separator is matching the characters in the date string + // We have to exclude the case when the date separator is more than one character and starts with '/' something like "//" for instance. + if (((dtfi.DateSeparator.Length > 1 && dtfi.DateSeparator[0] == '/') || !str.Match('/')) && + !str.Match(dtfi.DateSeparator)) + { // A date separator is expected. result.SetFailure(ParseFailureKind.Format, "Format_BadDateTime", null); return false; diff --git a/src/mscorlib/src/System/Resources/ResourceManager.cs b/src/mscorlib/src/System/Resources/ResourceManager.cs index f55b55482a..132d854dd3 100644 --- a/src/mscorlib/src/System/Resources/ResourceManager.cs +++ b/src/mscorlib/src/System/Resources/ResourceManager.cs @@ -38,7 +38,10 @@ namespace System.Resources { // // This is implemented in System.Runtime.WindowsRuntime as function System.Resources.WindowsRuntimeResourceManager, // allowing us to ask for a WinRT-specific ResourceManager. - // Ideally this would be an interface, or at least an abstract class - but neither seems to play nice with FriendAccessAllowed. + // It is important to have WindowsRuntimeResourceManagerBase as regular class with virtual methods and default implementations. + // Defining WindowsRuntimeResourceManagerBase as abstract class or interface will cause issues when adding more methods to it + // because itll create dependency between mscorlib and System.Runtime.WindowsRuntime which will require always shipping both DLLs together. + // Also using interface or abstract class will not play nice with FriendAccessAllowed. // [FriendAccessAllowed] [SecurityCritical] @@ -54,6 +57,9 @@ namespace System.Resources { [SecurityCritical] get { return null; } } + + [SecurityCritical] + public virtual bool SetGlobalResourceContextDefaultCulture(CultureInfo ci) { return false; } } [FriendAccessAllowed] diff --git a/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs b/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs index 18c6335d1e..7b58dbe7ff 100644 --- a/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs +++ b/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft. All rights reserved. +// Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. // =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ diff --git a/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs b/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs index 09d34779f4..ac9f4d4f89 100644 --- a/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs +++ b/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs @@ -967,7 +967,6 @@ namespace System.Security.Permissions { } } } - } [Serializable] diff --git a/src/mscorlib/src/System/String.cs b/src/mscorlib/src/System/String.cs index d9f40899b6..9d4dcde887 100644 --- a/src/mscorlib/src/System/String.cs +++ b/src/mscorlib/src/System/String.cs @@ -761,6 +761,8 @@ namespace System { } #if FEATURE_RANDOMIZED_STRING_HASHING + // Do not remove! + // This method is called by reflection in System.Xml [System.Security.SecurityCritical] [MethodImplAttribute(MethodImplOptions.InternalCall)] internal static extern int InternalMarvin32HashString(string s, int strLen, long additionalEntropy); diff --git a/src/mscorlib/src/System/Threading/ExecutionContext.cs b/src/mscorlib/src/System/Threading/ExecutionContext.cs index 3eb895cb4c..3fc1e14908 100644 --- a/src/mscorlib/src/System/Threading/ExecutionContext.cs +++ b/src/mscorlib/src/System/Threading/ExecutionContext.cs @@ -1123,6 +1123,8 @@ namespace System.Threading ec.IllogicalCallContext = (IllogicalCallContext)this.IllogicalCallContext.CreateCopy(); #endif // #if FEATURE_REMOTING + ec._localValues = this._localValues; + ec._localChangeNotifications = this._localChangeNotifications; ec.isFlowSuppressed = this.isFlowSuppressed; return ec; diff --git a/src/mscorlib/src/System/Threading/Tasks/Task.cs b/src/mscorlib/src/System/Threading/Tasks/Task.cs index 2654e1ba79..ebd8c2b614 100644 --- a/src/mscorlib/src/System/Threading/Tasks/Task.cs +++ b/src/mscorlib/src/System/Threading/Tasks/Task.cs @@ -201,7 +201,8 @@ namespace System.Threading.Tasks // Values for ContingentProperties.m_internalCancellationRequested. private const int CANCELLATION_REQUESTED = 0x1; - // Can be null, a single continuation, a list of continuations, or s_taskCompletionSentinel. + // Can be null, a single continuation, a list of continuations, or s_taskCompletionSentinel, + // in that order. The logic arround this object assumes it will never regress to a previous state. private volatile object m_continuationObject = null; // m_continuationObject is set to this when the task completes. @@ -4737,23 +4738,12 @@ namespace System.Threading.Tasks // continuation. if (m_continuationObject != s_taskCompletionSentinel) { - // Before growing the list, we look to see whether any continuations in the list - // have already completed and thus can be removed. This helps to avoid both unnecessary - // growth to the list, but more importantly it helps to avoid temporary leaks when this - // task is long-lived and a registered continuation has already completed yet is still - // being kept alive. This can happen in the case of a cancelable continuation. + // Before growing the list we remove possible null entries that are the + // result from RemoveContinuations() if (list.Count == list.Capacity) { - for (int index = list.Count - 1; index >= 0; index--) - { - StandardTaskContinuation cont = list[index] as StandardTaskContinuation; - if (cont != null && cont.m_task.IsCanceled) - { - list.RemoveAt(index); - } - } + list.RemoveAll(s_IsTaskContinuationNullPredicate); } - if (addBeforeOthers) list.Insert(0, tc); @@ -4792,17 +4782,37 @@ namespace System.Threading.Tasks // Removes a continuation task from m_continuations internal void RemoveContinuation(object continuationObject) // could be TaskContinuation or Action<Task> { - // We need to snap a local reference to m_continuations - // because we could be racing w/ FinishContinuations() which nulls out m_continuationObject in the end - List<object> continuationsLocalRef = m_continuationObject as List<object>; + // We need to snap a local reference to m_continuations since reading a volatile object is more costly. + // Also to prevent the value to be changed as result of a race condition with another method. + object continuationsLocalRef = m_continuationObject; + + // Task is completed. Nothing to do here. + if (continuationsLocalRef == s_taskCompletionSentinel) return; + + List<object> continuationsLocalListRef = continuationsLocalRef as List<object>; - // If continuationsLocalRef == null, it means that m_continuationObject is not a list. We only - // remove continuations if there are a list of them. We take no action if there are no continuations - // (m_continuationObject == null), if we are tracking a single continuation (m_continuationObject is - // TaskContinuation or Action<Task>), or if the task is complete (m_continuationObject is s_taskCompletionSentinel). - if (continuationsLocalRef != null) + if (continuationsLocalListRef == null) { - lock (continuationsLocalRef) + // This is not a list. If we have a single object (the one we want to remove) we try to replace it with an empty list. + // Note we cannot go back to a null state, since it will mess up the AddTaskContinuation logic. + if (Interlocked.CompareExchange(ref m_continuationObject, new List<object>(), continuationObject) != continuationObject) + { + // If we fail it means that either AddContinuationComplex won the race condition and m_continuationObject is now a List + // that contains the element we want to remove. Or FinishContinuations set the s_taskCompletionSentinel. + // So we should try to get a list one more time + continuationsLocalListRef = m_continuationObject as List<object>; + } + else + { + // Exchange was successful so we can skip the last comparison + return; + } + } + + // if continuationsLocalRef == null it means s_taskCompletionSentinel has been set already and there is nothing else to do. + if (continuationsLocalListRef != null) + { + lock (continuationsLocalListRef) { // There is a small chance that this task completed since we took a local snapshot into // continuationsLocalRef. In that case, just return; we don't want to be manipulating the @@ -4810,25 +4820,19 @@ namespace System.Threading.Tasks if (m_continuationObject == s_taskCompletionSentinel) return; // Find continuationObject in the continuation list - int index = continuationsLocalRef.IndexOf(continuationObject); + int index = continuationsLocalListRef.IndexOf(continuationObject); if (index != -1) { // null out that TaskContinuation entry, which will be interpreted as "to be cleaned up" - continuationsLocalRef[index] = null; + continuationsLocalListRef[index] = null; - // if the list of continuations is large enough it's time to compact it by removing - // all entries marked for clean up - if (continuationsLocalRef.Count > 128) - { - continuationsLocalRef.RemoveAll(s_IsTaskContinuationNullPredicate); // RemoveAll has better performance than doing it ourselves - } } } } } - // statically allocated delegate for the RemoveAll expression in RemoveContinuations() + // statically allocated delegate for the RemoveAll expression in RemoveContinuations() and AddContinuationComplex() private readonly static Predicate<object> s_IsTaskContinuationNullPredicate = new Predicate<object>((tc) => { return (tc == null); }); diff --git a/src/tools/crossgen/crossgen.nativeproj b/src/tools/crossgen/crossgen.nativeproj index da37792489..5377194c76 100644 --- a/src/tools/crossgen/crossgen.nativeproj +++ b/src/tools/crossgen/crossgen.nativeproj @@ -41,9 +41,15 @@ <ProjectReference>$(ClrSrcDirectory)zap\crossgen\zap_crossgen.nativeproj</ProjectReference> </TargetLib> - <TargetLib Condition="'$(MDILGenerator)' != 'true'" Include="$(ClrLibPath)\jit_crossgen.lib"> - <ProjectReference Condition="'$(_BuildArch)' != 'amd64' and '$(_BuildArch)' != 'arm64'">$(ClrSrcDirectory)jit32\crossgen\jit_crossgen.nativeproj</ProjectReference> + <!-- In the CodeGen branch, we use RyuJIT for all JIT builds --> + <TargetLib Condition="'$(MDILGenerator)' != 'true' and '$(_BuildBranch)' == 'CodeGen'" Include="$(ClrLibPath)\jit_crossgen.lib"> + <ProjectReference>$(ClrSrcDirectory)jit\crossgen\jit_crossgen.nativeproj</ProjectReference> + </TargetLib> + + <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 --> + <TargetLib Condition="'$(MDILGenerator)' != 'true' and '$(_BuildBranch)' != 'CodeGen'" Include="$(ClrLibPath)\jit_crossgen.lib"> <ProjectReference Condition="'$(_BuildArch)' == 'amd64' or '$(_BuildArch)' == 'arm64'">$(ClrSrcDirectory)jit\crossgen\jit_crossgen.nativeproj</ProjectReference> + <ProjectReference Condition="'$(_BuildArch)' != 'amd64' and '$(_BuildArch)' != 'arm64'">$(ClrSrcDirectory)jit32\crossgen\jit_crossgen.nativeproj</ProjectReference> </TargetLib> <TargetLib Condition="'$(MDILGenerator)' == 'true'" Include="$(ClrLibPath)\jitmdil_crossgen.lib"> diff --git a/src/vm/ClrEtwAll.man b/src/vm/ClrEtwAll.man index 944f8985d6..ea3c4da935 100644 --- a/src/vm/ClrEtwAll.man +++ b/src/vm/ClrEtwAll.man @@ -114,6 +114,10 @@ <opcode name="GCBulkRootStaticVar" message="$(string.RuntimePublisher.GCBulkRootStaticVarOpcodeMessage)" symbol="CLR_GC_BULKROOTSTATICVAR_OPCODE" value="40"> </opcode> <opcode name="IncreaseMemoryPressure" message="$(string.RuntimePublisher.IncreaseMemoryPressureOpcodeMessage)" symbol="CLR_GC_INCREASEMEMORYPRESSURE_OPCODE" value="200"> </opcode> <opcode name="DecreaseMemoryPressure" message="$(string.RuntimePublisher.DecreaseMemoryPressureOpcodeMessage)" symbol="CLR_GC_DECREASEMEMORYPRESSURE_OPCODE" value="201"> </opcode> + <opcode name="GCMarkWithType" message="$(string.RuntimePublisher.GCMarkOpcodeMessage)" symbol="CLR_GC_MARK_OPCODE" value="202"> </opcode> + <opcode name="GCJoin" message="$(string.RuntimePublisher.GCJoinOpcodeMessage)" symbol="CLR_GC_JOIN_OPCODE" value="203"> </opcode> + <opcode name="GCPerHeapHistory" message="$(string.RuntimePublisher.GCPerHeapHistoryOpcodeMessage)" symbol="CLR_GC_GCPERHEAPHISTORY_OPCODE" value="204"> </opcode> + <opcode name="GCGlobalHeapHistory" message="$(string.RuntimePublisher.GCGlobalHeapHistoryOpcodeMessage)" symbol="CLR_GC_GCGLOBALHEAPHISTORY_OPCODE" value="205"> </opcode> </opcodes> </task> @@ -398,7 +402,9 @@ <map value="0" message="$(string.RuntimePublisher.GCRootKind.Stack)"/> <map value="1" message="$(string.RuntimePublisher.GCRootKind.Finalizer)"/> <map value="2" message="$(string.RuntimePublisher.GCRootKind.Handle)"/> - <map value="3" message="$(string.RuntimePublisher.GCRootKind.Other)"/> + <map value="3" message="$(string.RuntimePublisher.GCRootKind.Older)"/> + <map value="4" message="$(string.RuntimePublisher.GCRootKind.SizedRef)"/> + <map value="5" message="$(string.RuntimePublisher.GCRootKind.Overflow)"/> </valueMap> <valueMap name="GCHandleKindMap"> <map value="0x0" message="$(string.RuntimePublisher.GCHandleKind.WeakShortMessage)"/> @@ -962,6 +968,115 @@ </UserData> </template> + <template tid="GCMarkWithType"> + <data name="HeapNum" inType="win:UInt32" /> + <data name="ClrInstanceID" inType="win:UInt16" /> + <data name="Type" inType="win:UInt32" map="GCRootKindMap" /> + <data name="Bytes" inType="win:UInt64" /> + + <UserData> + <GCMarkWithType xmlns="myNs"> + <HeapNum> %1 </HeapNum> + <ClrInstanceID> %2 </ClrInstanceID> + <Type> %3 </Type> + <Bytes> %4 </Bytes> + </GCMarkWithType> + </UserData> + </template> + + <template tid="GCJoin_V2"> + <data name="Heap" inType="win:UInt32" /> + <data name="JoinTime" inType="win:UInt32" /> + <data name="JoinType" inType="win:UInt32" /> + <data name="ClrInstanceID" inType="win:UInt16" /> + <data name="JoinID" inType="win:UInt32" /> + + <UserData> + <GCJoin_V2 xmlns="myNs"> + <Heap> %1 </Heap> + <JoinTime> %2 </JoinTime> + <JoinType> %3 </JoinType> + <ClrInstanceID> %4 </ClrInstanceID> + <JoinID> %5 </JoinID> + </GCJoin_V2> + </UserData> + </template> + + <template tid="GCPerHeapHistory_V3"> + <data name="ClrInstanceID" inType="win:UInt16" /> + <data name="FreeListAllocated" inType="win:Pointer" outType="win:HexInt64" /> + <data name="FreeListRejected" inType="win:Pointer" outType="win:HexInt64" /> + <data name="EndOfSegAllocated" inType="win:Pointer" outType="win:HexInt64" /> + <data name="CondemnedAllocated" inType="win:Pointer" outType="win:HexInt64" /> + <data name="PinnedAllocated" inType="win:Pointer" outType="win:HexInt64" /> + <data name="PinnedAllocatedAdvance" inType="win:Pointer" outType="win:HexInt64" /> + <data name="RunningFreeListEfficiency" inType="win:UInt32" /> + <data name="CondemnReasons0" inType="win:UInt32" /> + <data name="CondemnReasons1" inType="win:UInt32" /> + <data name="CompactMechanisms" inType="win:UInt32" /> + <data name="ExpandMechanisms" inType="win:UInt32" /> + <data name="HeapIndex" inType="win:UInt32" /> + <data name="ExtraGen0Commit" inType="win:Pointer" outType="win:HexInt64" /> + <data name="Count" inType="win:UInt32" /> + <struct name="Values" count="Count" > + <data name="SizeBefore" inType="win:Pointer" outType="win:HexInt64" /> + <data name="FreeListBefore" inType="win:Pointer" outType="win:HexInt64" /> + <data name="FreeObjBefore" inType="win:Pointer" outType="win:HexInt64" /> + <data name="SizeAfter" inType="win:Pointer" outType="win:HexInt64" /> + <data name="FreeListAfter" inType="win:Pointer" outType="win:HexInt64" /> + <data name="FreeObjAfter" inType="win:Pointer" outType="win:HexInt64" /> + <data name="In" inType="win:Pointer" outType="win:HexInt64" /> + <data name="PinnedSurv" inType="win:Pointer" outType="win:HexInt64" /> + <data name="NonePinnedSurv" inType="win:Pointer" outType="win:HexInt64" /> + <data name="NewAllocation" inType="win:Pointer" outType="win:HexInt64" /> + </struct> + <UserData> + <GCPerHeapHistory_V3 xmlns="myNs"> + <ClrInstanceID> %1 </ClrInstanceID> + <FreeListAllocated> %2 </FreeListAllocated> + <FreeListRejected> %3 </FreeListRejected> + <EndOfSegAllocated> %4 </EndOfSegAllocated> + <CondemnedAllocated> %5 </CondemnedAllocated> + <PinnedAllocated> %6 </PinnedAllocated> + <PinnedAllocatedAdvance> %7 </PinnedAllocatedAdvance> + <RunningFreeListEfficiency> %8 </RunningFreeListEfficiency> + <CondemnReasons0> %9 </CondemnReasons0> + <CondemnReasons1> %10 </CondemnReasons1> + <CompactMechanisms> %11 </CompactMechanisms> + <ExpandMechanisms> %12 </ExpandMechanisms> + <HeapIndex> %13 </HeapIndex> + <ExtraGen0Commit> %14 </ExtraGen0Commit> + <Count> %15 </Count> + </GCPerHeapHistory_V3> + </UserData> + </template> + + <template tid="GCGlobalHeap_V2"> + <data name="FinalYoungestDesired" inType="win:UInt64" outType="win:HexInt64" /> + <data name="NumHeaps" inType="win:Int32" /> + <data name="CondemnedGeneration" inType="win:UInt32" /> + <data name="Gen0ReductionCount" inType="win:UInt32" /> + <data name="Reason" inType="win:UInt32" /> + <data name="GlobalMechanisms" inType="win:UInt32" /> + <data name="ClrInstanceID" inType="win:UInt16" /> + <data name="PauseMode" inType="win:UInt32" /> + <data name="MemoryPressure" inType="win:UInt32" /> + + <UserData> + <GCGlobalHeap_V2 xmlns="myNs"> + <FinalYoungestDesired> %1 </FinalYoungestDesired> + <NumHeaps> %2 </NumHeaps> + <CondemnedGeneration> %3 </CondemnedGeneration> + <Gen0ReductionCount> %4 </Gen0ReductionCount> + <Reason> %5 </Reason> + <GlobalMechanisms> %6 </GlobalMechanisms> + <ClrInstanceID> %7 </ClrInstanceID> + <PauseMode> %8 </PauseMode> + <MemoryPressure> %9 </MemoryPressure> + </GCGlobalHeap_V2> + </UserData> + </template> + <template tid="FinalizeObject"> <data name="TypeID" inType="win:Pointer" /> <data name="ObjectID" inType="win:Pointer" /> @@ -2297,7 +2412,7 @@ symbol="GCBulkMovedObjectRanges" message="$(string.RuntimePublisher.GCBulkMovedObjectRangesEventMessage)"/> <event value="23" version="0" level="win:Informational" template="GCGenerationRange" - keywords ="GCKeyword" opcode="GCGenerationRange" + keywords ="GCHeapSurvivalAndMovementKeyword" opcode="GCGenerationRange" task="GarbageCollection" symbol="GCGenerationRange" message="$(string.RuntimePublisher.GCGenerationRangeEventMessage)"/> @@ -2895,6 +3010,26 @@ task="GarbageCollection" symbol="DecreaseMemoryPressure" message="$(string.RuntimePublisher.DecreaseMemoryPressureEventMessage)"/> + <event value="202" version="0" level="win:Informational" template="GCMarkWithType" + keywords ="GCKeyword" opcode="GCMarkWithType" + task="GarbageCollection" + symbol="GCMarkWithType" message="$(string.RuntimePublisher.GCMarkWithTypeEventMessage)"/> + + <event value="203" version="2" level="win:Verbose" template="GCJoin_V2" + keywords ="GCKeyword" opcode="GCJoin" + task="GarbageCollection" + symbol="GCJoin_V2" message="$(string.RuntimePublisher.GCJoin_V2EventMessage)"/> + + <event value="204" version="3" level="win:Informational" template="GCPerHeapHistory_V3" + keywords ="GCKeyword" opcode="GCPerHeapHistory" + task="GarbageCollection" + symbol="GCPerHeapHistory_V3" message="$(string.RuntimePublisher.GCPerHeapHistory_V3EventMessage)"/> + + <event value="205" version="2" level="win:Informational" template="GCGlobalHeap_V2" + keywords ="GCKeyword" opcode="GCGlobalHeapHistory" + task="GarbageCollection" + symbol="GCGlobalHeapHistory_V2" message="$(string.RuntimePublisher.GCGlobalHeap_V2EventMessage)"/> + <!-- CLR Debugger events 240-249 --> <event value="240" version="0" level="win:Informational" keywords="DebuggerKeyword" opcode="win:Start" @@ -5945,6 +6080,10 @@ <string id="RuntimePublisher.GCMarkFinalizeQueueRootsEventMessage" value="HeapNum=%1;%nClrInstanceID=%2"/> <string id="RuntimePublisher.GCMarkHandlesEventMessage" value="HeapNum=%1;%nClrInstanceID=%2"/> <string id="RuntimePublisher.GCMarkOlderGenerationRootsEventMessage" value="HeapNum=%1;%nClrInstanceID=%2"/> + <string id="RuntimePublisher.GCMarkWithTypeEventMessage" value="HeapNum=%1;%nClrInstanceID=%2;%nType=%3;%nBytes=%4"/> + <string id="RuntimePublisher.GCJoin_V2EventMessage" value="Heap=%1;%nJoinTime=%2;%nJoinType=%3;%nClrInstanceID=%4;%nJoinID=%5"/> + <string id="RuntimePublisher.GCPerHeapHistory_V3EventMessage" value="ClrInstanceID=%1;%nFreeListAllocated=%2;%nFreeListRejected=%3;%nEndOfSegAllocated=%4;%nCondemnedAllocated=%5;%nPinnedAllocated=%6;%nPinnedAllocatedAdvance=%7;%RunningFreeListEfficiency=%8;%nCondemnReasons0=%9;%nCondemnReasons1=%10;%nCompactMechanisms=%11;%nExpandMechanisms=%12;%nHeapIndex=%13;%nExtraGen0Commit=%14;%nCount=%15"/> + <string id="RuntimePublisher.GCGlobalHeap_V2EventMessage" value="FinalYoungestDesired=%1;%nNumHeaps=%2;%nCondemnedGeneration=%3;%nGen0ReductionCountD=%4;%nReason=%5;%nGlobalMechanisms=%6;%nClrInstanceID=%7;%nPauseMode=%8;%nMemoryPressure=%9"/> <string id="RuntimePublisher.FinalizeObjectEventMessage" value="TypeID=%1;%nObjectID=%2;%nClrInstanceID=%3" /> <string id="RuntimePublisher.GCTriggeredEventMessage" value="Reason=%1" /> <string id="RuntimePublisher.PinObjectAtGCTimeEventMessage" value="HandleID=%1;%nObjectID=%2;%nObjectSize=%3;%nTypeName=%4;%n;%nClrInstanceID=%5" /> @@ -6296,7 +6435,9 @@ <string id="RuntimePublisher.GCRootKind.Stack" value="Stack" /> <string id="RuntimePublisher.GCRootKind.Finalizer" value="Finalizer" /> <string id="RuntimePublisher.GCRootKind.Handle" value="Handle" /> - <string id="RuntimePublisher.GCRootKind.Other" value="Other" /> + <string id="RuntimePublisher.GCRootKind.Older" value="Older" /> + <string id="RuntimePublisher.GCRootKind.SizedRef" value="SizedRef" /> + <string id="RuntimePublisher.GCRootKind.Overflow" value="Overflow" /> <string id="RuntimePublisher.Startup.CONCURRENT_GCMapMessage" value="CONCURRENT_GC" /> <string id="RuntimePublisher.Startup.LOADER_OPTIMIZATION_SINGLE_DOMAINMapMessage" value="LOADER_OPTIMIZATION_SINGLE_DOMAIN" /> <string id="RuntimePublisher.Startup.LOADER_OPTIMIZATION_MULTI_DOMAINMapMessage" value="LOADER_OPTIMIZATION_MULTI_DOMAIN" /> @@ -6523,6 +6664,10 @@ <string id="RuntimePublisher.GCMarkHandlesOpcodeMessage" value="MarkHandles" /> <string id="RuntimePublisher.GCMarkFinalizeQueueRootsOpcodeMessage" value="MarkFinalizeQueueRoots" /> <string id="RuntimePublisher.GCMarkOlderGenerationRootsOpcodeMessage" value="MarkCards" /> + <string id="RuntimePublisher.GCMarkOpcodeMessage" value="Mark" /> + <string id="RuntimePublisher.GCJoinOpcodeMessage" value="GCJoin" /> + <string id="RuntimePublisher.GCPerHeapHistoryOpcodeMessage" value="PerHeapHistory" /> + <string id="RuntimePublisher.GCGlobalHeapHistoryOpcodeMessage" value="GlobalHeapHistory" /> <string id="RuntimePublisher.FinalizeObjectOpcodeMessage" value="FinalizeObject" /> <string id="RuntimePublisher.BulkTypeOpcodeMessage" value="BulkType" /> <string id="RuntimePublisher.MethodLoadOpcodeMessage" value="Load" /> diff --git a/src/vm/ClrEtwAllMeta.lst b/src/vm/ClrEtwAllMeta.lst index 355fe08601..baa138dedf 100644 --- a/src/vm/ClrEtwAllMeta.lst +++ b/src/vm/ClrEtwAllMeta.lst @@ -101,6 +101,8 @@ nomac:GarbageCollection:::GCMarkHandles nostack:GarbageCollection:::GCMarkHandles nomac:GarbageCollection:::GCMarkOlderGenerationRoots nostack:GarbageCollection:::GCMarkOlderGenerationRoots +nomac:GarbageCollection:::GCMarkWithType +nostack:GarbageCollection:::GCMarkWithType nostack:GarbageCollection:::PinObjectAtGCTime nostack:GarbageCollection:::FinalizeObject nostack:GarbageCollection:::GCGenerationRange @@ -113,6 +115,11 @@ nostack:GarbageCollection:::GCBulkMovedObjectRanges nostack:GarbageCollection:::GCBulkRootCCW nostack:GarbageCollection:::GCBulkRCW nostack:GarbageCollection:::GCBulkRootStaticVar +nomac:GarbageCollection:::GCPerHeapHistory_V3 +nostack:GarbageCollection:::GCPerHeapHistory_V3 +nomac:GarbageCollection:::GCGlobalHeap_V2 +nostack:GarbageCollection:::GCGlobalHeap_V2 +nomac:GarbageCollection:::GCJoin_V2 ############# # Type events diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp index 123b415141..8721b54714 100644 --- a/src/vm/ceemain.cpp +++ b/src/vm/ceemain.cpp @@ -882,10 +882,7 @@ void EEStartupHelper(COINITIEE fFlags) // Fire the EE startup ETW event ETWFireEvent(EEStartupStart_V1); - - // Fire the runtime information ETW event - ETW::InfoLog::RuntimeInformation(ETW::InfoLog::InfoStructs::Normal); -#endif // FEATURE_EVENT_TRACE +#endif // FEATURE_EVENT_TRACE #ifdef FEATURE_IPCMAN // Give PerfMon a chance to hook up to us @@ -954,6 +951,9 @@ void EEStartupHelper(COINITIEE fFlags) IfFailGoLog(g_pConfig->sync()); } + // Fire the runtime information ETW event + ETW::InfoLog::RuntimeInformation(ETW::InfoLog::InfoStructs::Normal); + if (breakOnEELoad.val(CLRConfig::UNSUPPORTED_BreakOnEELoad) == 1) { #ifdef _DEBUG diff --git a/src/vm/codeman.cpp b/src/vm/codeman.cpp index bd9a82f270..77efda3d92 100644 --- a/src/vm/codeman.cpp +++ b/src/vm/codeman.cpp @@ -1518,10 +1518,6 @@ BOOL EEJitManager::LoadJIT() bool fUseRyuJit = (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_UseRyuJit) == 1); // uncached access, since this code is run no more than one time - // ****** TODO: Until the registry value is set by the .NET 4.6 installer, we pretend .NET 4.6 has been installed, which causes - // ****** RyuJit to be used by default. - fUseRyuJit = true; - if ((!IsCompilationProcess() || !fUseRyuJit) && // Use RyuJIT for all NGEN, unless we're falling back to JIT64 for everything. (newJitCompiler != nullptr)) // the main JIT must successfully load before we try loading the fallback JIT { diff --git a/src/vm/eventtrace.cpp b/src/vm/eventtrace.cpp index ccbd32d46b..c55c378005 100644 --- a/src/vm/eventtrace.cpp +++ b/src/vm/eventtrace.cpp @@ -4741,6 +4741,27 @@ VOID ETW::InfoLog::RuntimeInformation(INT32 type) #ifndef FEATURE_CORECLR startupFlags = CorHost2::GetStartupFlags(); + + // Some of the options specified by the startup flags can be overwritten by config files. + // Strictly speaking since the field in this event is called StartupFlags there's nothing + // wrong with just showing the actual startup flags but it makes it less useful (a more + // appropriate name for the field is StartupOptions). + startupFlags &= ~STARTUP_CONCURRENT_GC; + if (g_pConfig->GetGCconcurrent()) + startupFlags |= STARTUP_CONCURRENT_GC; + + if (g_pConfig->DefaultSharePolicy() != AppDomain::SHARE_POLICY_UNSPECIFIED) + { + startupFlags &= ~STARTUP_LOADER_OPTIMIZATION_MASK; + startupFlags |= g_pConfig->DefaultSharePolicy() << 1; + } + + startupFlags &= ~STARTUP_LEGACY_IMPERSONATION; + startupFlags &= ~STARTUP_ALWAYSFLOW_IMPERSONATION; + if (g_pConfig->ImpersonationMode() == IMP_NOFLOW) + startupFlags |= STARTUP_LEGACY_IMPERSONATION; + else if (g_pConfig->ImpersonationMode() == IMP_ALWAYSFLOW) + startupFlags |= STARTUP_ALWAYSFLOW_IMPERSONATION; #endif //!FEATURE_CORECLR // Determine the startupmode @@ -6391,12 +6412,105 @@ VOID ETW::MethodLog::SendEventsForNgenMethods(Module *pModule, DWORD dwEventOpti #endif // FEATURE_PREJIT } +// Called be ETW::MethodLog::SendEventsForJitMethods +// Sends the ETW events once our caller determines whether or not rejit locks can be acquired +VOID ETW::MethodLog::SendEventsForJitMethodsHelper(BaseDomain *pDomainFilter, + LoaderAllocator *pLoaderAllocatorFilter, + DWORD dwEventOptions, + BOOL fLoadOrDCStart, + BOOL fUnloadOrDCEnd, + BOOL fSendMethodEvent, + BOOL fSendILToNativeMapEvent, + BOOL fGetReJitIDs) +{ + CONTRACTL{ + THROWS; + GC_NOTRIGGER; + } CONTRACTL_END; + + EEJitManager::CodeHeapIterator heapIterator(pDomainFilter, pLoaderAllocatorFilter); + while (heapIterator.Next()) + { + MethodDesc * pMD = heapIterator.GetMethod(); + if (pMD == NULL) + continue; + + TADDR codeStart = heapIterator.GetMethodCode(); + + // Grab rejitID from the rejit manager. In some cases, such as collectible loader + // allocators, we don't support rejit so we need to short circuit the call. + // This also allows our caller to avoid having to pre-enter the rejit + // manager locks. + // see code:#TableLockHolder + ReJITID rejitID = + fGetReJitIDs ? pMD->GetReJitManager()->GetReJitIdNoLock(pMD, codeStart) : 0; + + // There are small windows of time where the heap iterator may come across a + // codeStart that is not yet published to the MethodDesc. This may happen if + // we're JITting the method right now on another thread, and have not completed + // yet. Detect the race, and skip the method if appropriate. (If rejitID is + // nonzero, there is no race, as GetReJitIdNoLock will not return a nonzero + // rejitID if the codeStart has not yet been published for that rejitted version + // of the method.) This check also catches recompilations due to EnC, which we do + // not want to issue events for, in order to ensure xperf's assumption that + // MethodDesc* + ReJITID + extent (hot vs. cold) form a unique key for code + // ranges of methods + if ((rejitID == 0) && (codeStart != PCODEToPINSTR(pMD->GetNativeCode()))) + continue; + + // When we're called to announce loads, then the methodload event itself must + // precede any supplemental events, so that the method load or method jitting + // event is the first event the profiler sees for that MethodID (and not, say, + // the MethodILToNativeMap event.) + if (fLoadOrDCStart) + { + if (fSendMethodEvent) + { + ETW::MethodLog::SendMethodEvent( + pMD, + dwEventOptions, + TRUE, // bIsJit + NULL, // namespaceOrClassName + NULL, // methodName + NULL, // methodSignature + codeStart, + rejitID); + } + } + + // Send any supplemental events requested for this MethodID + if (fSendILToNativeMapEvent) + ETW::MethodLog::SendMethodILToNativeMapEvent(pMD, dwEventOptions, rejitID); + + // When we're called to announce unloads, then the methodunload event itself must + // come after any supplemental events, so that the method unload event is the + // last event the profiler sees for this MethodID + if (fUnloadOrDCEnd) + { + if (fSendMethodEvent) + { + ETW::MethodLog::SendMethodEvent( + pMD, + dwEventOptions, + TRUE, // bIsJit + NULL, // namespaceOrClassName + NULL, // methodName + NULL, // methodSignature + codeStart, + rejitID); + } + } + } +} + /****************************************************************************/ /* This routine sends back method events of type 'dwEventOptions', for all JITed methods in either a given LoaderAllocator (if pLoaderAllocatorFilter is non NULL) or in a given Domain (if pDomainFilter is non NULL) or for all methods (if both filters are null) */ /****************************************************************************/ +// Code review indicates this method is never called with both filters NULL. Ideally we would +// assert this and change the comment above, but given I am making a change late in the release I am being cautious VOID ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions) { CONTRACTL { @@ -6424,9 +6538,6 @@ VOID ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAl (ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap | ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap)) != 0; - BOOL fCollectibleLoaderAllocatorFilter = - ((pLoaderAllocatorFilter != NULL) && (pLoaderAllocatorFilter->IsCollectible())); - if (fSendILToNativeMapEvent) { // The call to SendMethodILToNativeMapEvent assumes that the debugger's lazy @@ -6439,83 +6550,48 @@ VOID ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAl g_pDebugInterface->InitializeLazyDataIfNecessary(); } - // GetRejitIdNoLock requires that the rejit lock is taken already. We need to take - // it here, before CodeHeapIterator takes the SingleUseLock because that is defined - // ordering. - ReJitManager::TableLockHolder lksharedRejitMgrModule(SharedDomain::GetDomain()->GetReJitManager()); - ReJitManager::TableLockHolder lkRejitMgrModule(pDomainFilter->GetReJitManager()); - EEJitManager::CodeHeapIterator heapIterator(pDomainFilter, pLoaderAllocatorFilter); - while(heapIterator.Next()) + // #TableLockHolder: + // + // A word about ReJitManager::TableLockHolder... As we enumerate through the functions, + // we may need to grab their ReJITIDs. The ReJitManager grabs its table Crst in order to + // fetch these. However, several other kinds of locks are being taken during this + // enumeration, such as the SystemDomain lock and the EEJitManager::CodeHeapIterator's + // lock. In order to avoid lock-leveling issues, we grab the appropriate ReJitManager + // table locks after SystemDomain and before CodeHeapIterator. In particular, we need to + // grab the SharedDomain's ReJitManager table lock as well as the specific AppDomain's + // ReJitManager table lock for the current AppDomain we're iterating. Why the SharedDomain's + // ReJitManager lock? For any given AppDomain we're iterating over, the MethodDescs we + // find may be managed by that AppDomain's ReJitManger OR the SharedDomain's ReJitManager. + // (This is due to generics and whether given instantiations may be shared based on their + // arguments.) Therefore, we proactively take the SharedDomain's ReJitManager's table + // lock up front, and then individually take the appropriate AppDomain's ReJitManager's + // table lock that corresponds to the domain or module we're currently iterating over. + // + + // We only support getting rejit IDs when filtering by domain. + if (pDomainFilter) + { + ReJitManager::TableLockHolder lkRejitMgrSharedDomain(SharedDomain::GetDomain()->GetReJitManager()); + ReJitManager::TableLockHolder lkRejitMgrModule(pDomainFilter->GetReJitManager()); + SendEventsForJitMethodsHelper(pDomainFilter, + pLoaderAllocatorFilter, + dwEventOptions, + fLoadOrDCStart, + fUnloadOrDCEnd, + fSendMethodEvent, + fSendILToNativeMapEvent, + TRUE); + } + else { - MethodDesc * pMD = heapIterator.GetMethod(); - if (pMD == NULL) - continue; - - TADDR codeStart = heapIterator.GetMethodCode(); - - // Grab rejitID from the rejit manager. Short-circuit the call if we're filtering - // by a collectible loader allocator, since rejit is not supported on RefEmit - // assemblies. - ReJITID rejitID = - fCollectibleLoaderAllocatorFilter ? - 0 : - pMD->GetReJitManager()->GetReJitIdNoLock(pMD, codeStart); - - // There are small windows of time where the heap iterator may come across a - // codeStart that is not yet published to the MethodDesc. This may happen if - // we're JITting the method right now on another thread, and have not completed - // yet. Detect the race, and skip the method if appropriate. (If rejitID is - // nonzero, there is no race, as GetReJitIdNoLock will not return a nonzero - // rejitID if the codeStart has not yet been published for that rejitted version - // of the method.) This check also catches recompilations due to EnC, which we do - // not want to issue events for, in order to ensure xperf's assumption that - // MethodDesc* + ReJITID + extent (hot vs. cold) form a unique key for code - // ranges of methods - if ((rejitID == 0) && (codeStart != PCODEToPINSTR(pMD->GetNativeCode()))) - continue; - - // When we're called to announce loads, then the methodload event itself must - // precede any supplemental events, so that the method load or method jitting - // event is the first event the profiler sees for that MethodID (and not, say, - // the MethodILToNativeMap event.) - if (fLoadOrDCStart) - { - if (fSendMethodEvent) - { - ETW::MethodLog::SendMethodEvent( - pMD, - dwEventOptions, - TRUE, // bIsJit - NULL, // namespaceOrClassName - NULL, // methodName - NULL, // methodSignature - codeStart, - rejitID); - } - } - - // Send any supplemental events requested for this MethodID - if (fSendILToNativeMapEvent) - ETW::MethodLog::SendMethodILToNativeMapEvent(pMD, dwEventOptions, rejitID); - - // When we're called to announce unloads, then the methodunload event itself must - // come after any supplemental events, so that the method unload event is the - // last event the profiler sees for this MethodID - if (fUnloadOrDCEnd) - { - if (fSendMethodEvent) - { - ETW::MethodLog::SendMethodEvent( - pMD, - dwEventOptions, - TRUE, // bIsJit - NULL, // namespaceOrClassName - NULL, // methodName - NULL, // methodSignature - codeStart, - rejitID); - } - } + SendEventsForJitMethodsHelper(pDomainFilter, + pLoaderAllocatorFilter, + dwEventOptions, + fLoadOrDCStart, + fUnloadOrDCEnd, + fSendMethodEvent, + fSendILToNativeMapEvent, + FALSE); } } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions); #endif // !DACCESS_COMPILE @@ -6859,6 +6935,12 @@ VOID ETW::EnumerationLog::EnumerationHelper(Module *moduleFilter, BaseDomain *do // Thus hitting a timeout due to a large number of methods will not affect modules rundown.tf g ETW::EnumerationLog::IterateModule(moduleFilter, enumerationOptions); + // As best I can tell from code review, these if statements below are never true. There is + // only one caller to this method that specifies a moduleFilter, ETW::LoaderLog::ModuleLoad. + // That method never specifies these flags. Because it is late in a release cycle I am not + // making a change, but if you see this comment early in the next release cycle consider + // deleting this apparently dead code. + // DC End or Unload Jit Method events from all Domains if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny) { @@ -6895,6 +6977,7 @@ VOID ETW::EnumerationLog::EnumerationHelper(Module *moduleFilter, BaseDomain *do ETW::EnumerationLog::IterateAppDomain(pDomain, enumerationOptions); } } + ETW::EnumerationLog::IterateDomain(SharedDomain::GetDomain(), enumerationOptions); } } diff --git a/src/vm/gcinfodecoder.cpp b/src/vm/gcinfodecoder.cpp index 3fd6ca2fce..2d886d4ad4 100644 --- a/src/vm/gcinfodecoder.cpp +++ b/src/vm/gcinfodecoder.cpp @@ -53,6 +53,22 @@ } while (0) #endif // !VALIDATE_ROOT +#ifndef LOG_PIPTR +#define LOG_PIPTR(pObjRef, gcFlags, hCallBack) \ + { \ + GCCONTEXT* pGCCtx = (GCCONTEXT*)(hCallBack); \ + if (pGCCtx->sc->promotion) \ + { \ + LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */ \ + LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR)))); \ + } \ + else \ + { \ + LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */ \ + LOG_PIPTR_OBJECT(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR)))); \ + } \ + } +#endif // !LOG_PIPTR bool GcInfoDecoder::SetIsInterruptibleCB (UINT32 startOffset, UINT32 stopOffset, LPVOID hCallback) { @@ -1559,8 +1575,7 @@ void GcInfoDecoder::ReportRegisterToGC( // AMD64 VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef); - LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */ - LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR)))); + LOG_PIPTR(pObjRef, gcFlags, hCallBack); #endif //_DEBUG gcFlags |= CHECK_APP_DOMAIN; @@ -1657,8 +1672,7 @@ void GcInfoDecoder::ReportRegisterToGC( // ARM VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef); - LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */ - LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR)))); + LOG_PIPTR(pObjRef, gcFlags, hCallBack); #endif //_DEBUG gcFlags |= CHECK_APP_DOMAIN; @@ -1752,8 +1766,7 @@ void GcInfoDecoder::ReportRegisterToGC( // ARM64 VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef); - LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */ - LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR)))); + LOG_PIPTR(pObjRef, gcFlags, hCallBack); #endif //_DEBUG gcFlags |= CHECK_APP_DOMAIN; @@ -1893,8 +1906,7 @@ void GcInfoDecoder::ReportStackSlotToGC( VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef); - LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */ - LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR)))); + LOG_PIPTR(pObjRef, gcFlags, hCallBack); #endif gcFlags |= CHECK_APP_DOMAIN; diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp index c12351adc5..a50590aac9 100644 --- a/src/vm/jitinterface.cpp +++ b/src/vm/jitinterface.cpp @@ -7850,7 +7850,6 @@ CorInfoInline CEEInfo::canInline (CORINFO_METHOD_HANDLE hCaller, } } - #ifdef PROFILING_SUPPORTED if (CORProfilerPresent()) { diff --git a/src/vm/rejit.cpp b/src/vm/rejit.cpp index 33ec16b83f..0233da062e 100644 --- a/src/vm/rejit.cpp +++ b/src/vm/rejit.cpp @@ -1498,9 +1498,16 @@ HRESULT ReJitManager::DoJumpStampIfNecessary(MethodDesc* pMD, PCODE pCode) pInfoToJumpStamp = FindPreReJittedReJitInfo(beginIter, endIter); if (pInfoToJumpStamp != NULL) { - // Found it. Jump-stamp, SetNativeCode, and we're done. _ASSERTE(pInfoToJumpStamp->GetMethodDesc() == pMD); - return pInfoToJumpStamp->JumpStampNativeCode(pCode); + // does it need to be jump-stamped? + if (pInfoToJumpStamp->GetState() != ReJitInfo::kJumpNone) + { + return S_OK; + } + else + { + return pInfoToJumpStamp->JumpStampNativeCode(pCode); + } } // In this case, try looking up by module / metadata token. This is the case where @@ -1521,6 +1528,19 @@ HRESULT ReJitManager::DoJumpStampIfNecessary(MethodDesc* pMD, PCODE pCode) return S_OK; } + // The placeholder may already have a rejit info for this MD, in which + // case we don't need to do any additional work + for (ReJitInfo * pInfo = pInfoPlaceholder->m_pShared->GetMethods(); pInfo != NULL; pInfo = pInfo->m_pNext) + { + if ((pInfo->GetKey().m_keyType == ReJitInfo::Key::kMethodDesc) && + (pInfo->GetMethodDesc() == pMD)) + { + // Any rejit info we find should already be jumpstamped + _ASSERTE(pInfo->GetState() != ReJitInfo::kJumpNone); + return S_OK; + } + } + #ifdef _DEBUG { Module * pModuleTest = NULL; diff --git a/src/zap/zapimage.cpp b/src/zap/zapimage.cpp index 1439bbad11..67bad6d1ee 100644 --- a/src/zap/zapimage.cpp +++ b/src/zap/zapimage.cpp @@ -1147,6 +1147,76 @@ HANDLE ZapImage::GenerateFile(LPCWSTR wszOutputFileName, CORCOMPILE_NGEN_SIGNATU return hFile; } +#ifdef FEATURE_FUSION +#define WOF_PROVIDER_FILE (0x00000002) + +typedef BOOL (WINAPI *WofShouldCompressBinaries_t) ( + __in LPCWSTR Volume, + __out PULONG Algorithm + ); + +typedef HRESULT (WINAPI *WofSetFileDataLocation_t) ( + __in HANDLE hFile, + __out ULONG Provider, + __in PVOID FileInfo, + __in ULONG Length + ); + +typedef struct _WOF_FILE_COMPRESSION_INFO { + ULONG Algorithm; +} WOF_FILE_COMPRESSION_INFO, *PWOF_FILE_COMPRESSION_INFO; + +// Check if files on the volume identified by volumeLetter should be compressed. +// If yes, compress the file associated with hFile. +static void CompressFile(WCHAR volumeLetter, HANDLE hFile) +{ + if (IsNgenOffline()) + { + return; + } + + // Wofutil.dll is available on Windows 8.1 and above. Return on platforms without wofutil.dll. + HModuleHolder wofLibrary(WszLoadLibraryEx(L"wofutil.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32)); + if (wofLibrary == nullptr) + { + return; + } + + // WofShouldCompressBinaries is available on Windows 10 and above. + // Windows 8.1 version of wofutil.dll does not have this function. + WofShouldCompressBinaries_t WofShouldCompressBinaries + = (WofShouldCompressBinaries_t)GetProcAddress(wofLibrary, "WofShouldCompressBinaries"); + if (WofShouldCompressBinaries == nullptr) + { + return; + } + + WCHAR volume[4] = L"X:\\"; + volume[0] = volumeLetter; + ULONG algorithm = 0; + + bool compressionSuitable = (WofShouldCompressBinaries(volume, &algorithm) == TRUE); + if (compressionSuitable) + { + // WofSetFileDataLocation is available on Windows 8.1 and above, however, Windows 8.1 version + // of WofSetFileDataLocation works for WIM only, and Windows 10 is required for compression of + // normal files. This isn't a problem for us, since the check for WofShouldCompressBinaries + // above should have already returned on Windows 8.1. + WofSetFileDataLocation_t WofSetFileDataLocation = + (WofSetFileDataLocation_t)GetProcAddress(wofLibrary, "WofSetFileDataLocation"); + if (WofSetFileDataLocation == nullptr) + { + return; + } + + WOF_FILE_COMPRESSION_INFO fileInfo; + fileInfo.Algorithm = algorithm; + + WofSetFileDataLocation(hFile, WOF_PROVIDER_FILE, &fileInfo, sizeof(WOF_FILE_COMPRESSION_INFO)); + } +} +#endif + HANDLE ZapImage::SaveImage(LPCWSTR wszOutputFileName, CORCOMPILE_NGEN_SIGNATURE * pNativeImageSig) { if (!IsReadyToRunCompilation()) @@ -1174,6 +1244,10 @@ HANDLE ZapImage::SaveImage(LPCWSTR wszOutputFileName, CORCOMPILE_NGEN_SIGNATURE PrintStats(wszOutputFileName); #endif +#ifdef FEATURE_FUSION + CompressFile(wszOutputFileName[0], hFile); +#endif + return hFile; } diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 6cddacfc11..56835cac94 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -905,10 +905,6 @@ void Zapper::InitEE(BOOL fForceDebug, BOOL fForceProfile, BOOL fForceInstrument) static ConfigDWORD useRyuJitValue; bool fUseRyuJit = (useRyuJitValue.val(CLRConfig::INTERNAL_UseRyuJit) == 1); - // ****** TODO: Until the registry value is set by the .NET 4.6 installer, we pretend .NET 4.6 has been installed, which causes - // ****** RyuJit to be used by default. - fUseRyuJit = true; - if (!fUseRyuJit) // Do we need to fall back to JIT64 for NGEN? { LPCWSTR pwzJitName = MAKEDLLNAME_W(L"compatjit"); -- 2.34.1