From f1c0c7d95164abbbc4dfbd5bc312ef543256b0e0 Mon Sep 17 00:00:00 2001
From: dotnet-bot <dotnet-bot@microsoft.com>
Date: Tue, 7 Apr 2015 17:23:32 -0700
Subject: [PATCH] Merge changes from parent branch

[tfs-changeset: 1448103]
---
 src/ToolBox/SOS/Strike/DisasmARM.cpp          |   1 +
 src/ToolBox/SOS/Strike/DisasmARM64.cpp        |   1 +
 src/ToolBox/SOS/Strike/disasm.cpp             |   4 +
 src/debug/di/process.cpp                      |  40 +-
 src/dirs.proj                                 |  13 +-
 src/dlls/mscoree/mscoree.settings.targets     |  21 +-
 src/gc/gc.cpp                                 | 456 ++++++++++--------
 src/gc/gcimpl.h                               |   1 +
 src/gc/gcpriv.h                               |  15 +-
 src/gc/gcrecord.h                             | 105 ++--
 src/gc/objecthandle.cpp                       |   8 +-
 src/inc/corprof.idl                           |   1 +
 src/inc/eventtrace.h                          |  22 +-
 src/inc/eventtracebase.h                      |  14 +-
 src/inc/fxretarget.h                          |   1 +
 src/inc/stdmacros.h                           |  10 +-
 src/jit/assertionprop.cpp                     |   4 +-
 src/jit/codegencommon.cpp                     |  24 +-
 src/jit/codegenxarch.cpp                      |  30 +-
 src/jit/compiler.h                            |  18 +-
 src/jit/flowgraph.cpp                         |  23 +-
 src/jit/gentree.cpp                           | 271 ++---------
 src/jit/gentree.h                             |   6 +-
 src/jit/lsra.cpp                              |  89 +++-
 src/jit/lsra.h                                |   6 +-
 src/jit/morph.cpp                             | 210 +++++---
 src/jit/optimizer.cpp                         |  97 +++-
 src/jit/rangecheck.cpp                        |  24 +-
 src/jit/simdcodegenxarch.cpp                  |  37 +-
 .../Eventing/FrameworkEventSource.cs          |  31 +-
 src/mscorlib/src/System/Environment.cs        |   7 +-
 .../src/System/Globalization/CultureInfo.cs   |  50 +-
 .../System/Globalization/DateTimeFormat.cs    |   1 +
 .../src/System/Globalization/DateTimeParse.cs |  11 +-
 .../src/System/Resources/ResourceManager.cs   |   8 +-
 .../Runtime/CompilerServices/TaskAwaiter.cs   |   2 +-
 .../Security/Permissions/FileIOPermission.cs  |   1 -
 src/mscorlib/src/System/String.cs             |   2 +
 .../src/System/Threading/ExecutionContext.cs  |   2 +
 .../src/System/Threading/Tasks/Task.cs        |  70 +--
 src/tools/crossgen/crossgen.nativeproj        |  10 +-
 src/vm/ClrEtwAll.man                          | 151 +++++-
 src/vm/ClrEtwAllMeta.lst                      |   7 +
 src/vm/ceemain.cpp                            |   8 +-
 src/vm/codeman.cpp                            |   4 -
 src/vm/eventtrace.cpp                         | 241 ++++++---
 src/vm/gcinfodecoder.cpp                      |  28 +-
 src/vm/jitinterface.cpp                       |   1 -
 src/vm/rejit.cpp                              |  24 +-
 src/zap/zapimage.cpp                          |  74 +++
 src/zap/zapper.cpp                            |   4 -
 51 files changed, 1453 insertions(+), 836 deletions(-)

diff --git a/src/ToolBox/SOS/Strike/DisasmARM.cpp b/src/ToolBox/SOS/Strike/DisasmARM.cpp
index 37f89c4002..0f86e7f071 100644
--- a/src/ToolBox/SOS/Strike/DisasmARM.cpp
+++ b/src/ToolBox/SOS/Strike/DisasmARM.cpp
@@ -34,6 +34,7 @@ namespace ARMGCDump
 #define LF_GCROOTS
 #define LL_INFO1000
 #define LOG(x)
+#define LOG_PIPTR(pObjRef, gcFlags, hCallBack)
 #define DAC_ARG(x)
 #include "gcdumpnonx86.cpp"
 }
diff --git a/src/ToolBox/SOS/Strike/DisasmARM64.cpp b/src/ToolBox/SOS/Strike/DisasmARM64.cpp
index 8c9f704c31..4bfae58ee1 100644
--- a/src/ToolBox/SOS/Strike/DisasmARM64.cpp
+++ b/src/ToolBox/SOS/Strike/DisasmARM64.cpp
@@ -37,6 +37,7 @@ namespace ARM64GCDump
 #define LF_GCROOTS
 #define LL_INFO1000
 #define LOG(x)
+#define LOG_PIPTR(pObjRef, gcFlags, hCallBack)
 #define DAC_ARG(x)
 #include "gcdumpnonx86.cpp"
 }
diff --git a/src/ToolBox/SOS/Strike/disasm.cpp b/src/ToolBox/SOS/Strike/disasm.cpp
index 93a3709b9a..0d27c63bf0 100644
--- a/src/ToolBox/SOS/Strike/disasm.cpp
+++ b/src/ToolBox/SOS/Strike/disasm.cpp
@@ -56,6 +56,10 @@ namespace X86GCDump
     #undef LOG
     #endif
     #define LOG(x) ((void)0)
+    #ifdef LOG_PIPTR
+    #undef LOG_PIPTR
+    #endif
+    #define LOG_PIPTR(pObjRef, gcFlags, hCallBack) ((void)0)
 #include "gcdumpnonx86.cpp"
 #endif // FEATURE_PAL
 #endif // SOS_TARGET_AMD64
diff --git a/src/debug/di/process.cpp b/src/debug/di/process.cpp
index 32da7dc2e6..167692be72 100644
--- a/src/debug/di/process.cpp
+++ b/src/debug/di/process.cpp
@@ -1045,8 +1045,7 @@ CordbProcess::~CordbProcess()
     // We shouldn't still be in Cordb's list of processes. Unfortunately, our root Cordb object
     // may have already been deleted b/c we're at the mercy of ref-counting, so we can't check.
     
-    if (m_sharedAppDomain)
-        delete m_sharedAppDomain;
+	_ASSERTE(m_sharedAppDomain == NULL);
         
     m_processMutex.Destroy();
     m_StopGoLock.Destroy();
@@ -1278,11 +1277,17 @@ void CordbProcess::NeuterChildren()
     m_ContinueNeuterList.NeuterAndClear(this);
 
     m_userThreads.NeuterAndClear(GetProcessLock());
-
+    
     m_pDefaultAppDomain = NULL;
 
     // Frees per-appdomain left-side resources. See assumptions above.
     m_appDomains.NeuterAndClear(GetProcessLock());
+    if (m_sharedAppDomain != NULL)
+    {
+        m_sharedAppDomain->Neuter();
+        m_sharedAppDomain->InternalRelease();
+        m_sharedAppDomain = NULL;
+    }
 
     m_steppers.NeuterAndClear(GetProcessLock());
 
@@ -8751,6 +8756,7 @@ CordbAppDomain * CordbProcess::GetSharedAppDomain()
         {
             delete pAD;
         }
+		m_sharedAppDomain->InternalAddRef();
     }
     
     return m_sharedAppDomain;
@@ -13093,9 +13099,31 @@ void CordbProcess::HandleDebugEventForInteropDebugging(const DEBUG_EVENT * pEven
                     fcd.action = HIJACK_ACTION_EXIT_UNHANDLED;
                 }
 
-                // if the user changed the context during this hijack or if it had the SingleStep flag set on it,
-                // then update the LS context
-                if (pUnmanagedThread->IsContextSet() || IsSSFlagEnabled(&tempContext))
+                //
+                // LS context is restored here so that execution continues from next instruction that caused the hijack.
+                // We shouldn't always restore the LS context though.
+                // Consider the following case where this can cause issues:
+                // Debuggee process hits an exception and calls KERNELBASE!RaiseException, debugger gets the notification and
+                // prepares for first-chance hijack. Debugger(DBI) saves the current thread context (see SetupFirstChanceHijackForSync) which is restored
+                // later below (see SafeWriteThreadContext call) when the process is in VEH (CLRVectoredExceptionHandlerShim->FirstChanceSuspendHijackWorker).
+                // The thread context that got saved(by SetupFirstChanceHijackForSync) was for when the thread was executing RaiseException and when
+                // this context gets restored in VEH, the thread resumes after the exception handler with a context that is not same as one with which
+                // it entered. This inconsistency can lead to bad execution code-paths or even a debuggee crash.
+                //
+                // Example case where we should definitely update the LS context:
+                // After a DbgBreakPoint call, IP gets updated to point to the instruction after int 3 and this is the context saved by debugger.
+                // The IP in context passed to VEH still points to int 3 though and if we don't update the LS context in VEH, the breakpoint
+                // instruction will get executed again.
+                //
+                // Here's a list of cases when we update the LS context:
+                // * we know that context was explicitly updated during this hijack, OR
+                // * if single-stepping flag was set on it originally, OR
+                // * if this was a breakpoint event
+                // Note that above list is a heuristic and it is possible that we need to add more such cases in future.
+                //
+                BOOL isBreakPointEvent = (pUnmanagedEvent->m_currentDebugEvent.dwDebugEventCode == EXCEPTION_DEBUG_EVENT &&
+                    pUnmanagedEvent->m_currentDebugEvent.u.Exception.ExceptionRecord.ExceptionCode == STATUS_BREAKPOINT);
+                if (pUnmanagedThread->IsContextSet() || IsSSFlagEnabled(&tempContext) || isBreakPointEvent)
                 {
                     _ASSERTE(fcd.pLeftSideContext != NULL);
                     LOG((LF_CORDB, LL_INFO10000, "W32ET::W32EL: updating LS context at 0x%p\n", fcd.pLeftSideContext));
diff --git a/src/dirs.proj b/src/dirs.proj
index 3599b7912b..ded6fb4267 100644
--- a/src/dirs.proj
+++ b/src/dirs.proj
@@ -41,13 +41,14 @@
         <ProjectFile Include="delayimp\delayimp.nativeproj" Condition="'$(BuildProjectName)' != 'CoreSys'"/>
         <ProjectFile Include="dlls\dirs.proj" />
         <ProjectFile Include="unwinder\dirs.proj" Condition="'$(TargetArch)' != 'i386'" />
+
+        <!-- In the CodeGen branch, we use RyuJIT for all JIT builds -->
+        <ProjectFile Include="jit\dirs.proj"   Condition="'$(_BuildBranch)' == 'CodeGen'" />
         
-	<!-- In FXMain we build RyuJIT only for amd64 and arm64 -->
-        <ProjectFile Include="jit32\dirs.proj" Condition="'$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64'"/>
-        <ProjectFile Include="jit\dirs.proj" Condition="'$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64'"/>
-	<!-- In Codegen we always build RyuJIT this includes x86 as well as amd64 and arm64 ==>
-        <ProjectFile Include="jit\dirs.proj" />
-        -->
+        <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 -->
+        <ProjectFile Include="jit\dirs.proj"   Condition="('$(_BuildBranch)' != 'CodeGen') and ('$(BuildArchitecture)' == 'amd64' or  '$(BuildArchitecture)' == 'arm64')" />
+        <ProjectFile Include="jit32\dirs.proj" Condition="('$(_BuildBranch)' != 'CodeGen') and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" />
+
         <ProjectFile Include="jit64\dirs.proj" />
 
         <ProjectFile Include="tools\dirs.proj" />
diff --git a/src/dlls/mscoree/mscoree.settings.targets b/src/dlls/mscoree/mscoree.settings.targets
index b82f1f51aa..70262350d4 100644
--- a/src/dlls/mscoree/mscoree.settings.targets
+++ b/src/dlls/mscoree/mscoree.settings.targets
@@ -20,7 +20,7 @@
         <LinkAdditionalOptions>$(LinkAdditionalOptions) /NXCOMPAT</LinkAdditionalOptions>
         <!-- /NOVCFEATURE forces linker to emit old .pdb format. It is required for scan.exe tool to work -->
         <LinkAdditionalOptions Condition="'$(BuildType)' == 'Checked' and '$(UseCoreToolset)' != 'true'">$(LinkAdditionalOptions) /NOVCFEATURE</LinkAdditionalOptions>
-	<LinkGenerateManifest Condition="'$(BuildForCoreSystem)' == 'true'">false</LinkGenerateManifest>
+        <LinkGenerateManifest Condition="'$(BuildForCoreSystem)' == 'true'">false</LinkGenerateManifest>
         <CDefines>$(CDefines);UNICODE;_UNICODE</CDefines>
         <PCHHeader>stdafx.h</PCHHeader>
         <EnableCxxPCHHeaders>true</EnableCxxPCHHeaders>
@@ -91,6 +91,17 @@
         <ImportLib Condition="'$(UseMsvcrt)'!='true' and '$(DebugBuild)' == 'true' and '$(BuildForCoreSystem)' != 'true'" Include="$(CrtLibPath)\libcpmtd.lib" />
     </ItemGroup>
 
+    <ItemGroup Condition="'$(FeatureMergeJitAndEngine)' == 'true'">
+        <ImportLib Include="$(ClrLibPath)\clrjit.lib" />
+
+        <!-- In the CodeGen branch, we use RyuJIT for all JIT builds -->
+        <ProjectReference Condition="'$(_BuildBranch)' == 'CodeGen'" Include="$(ClrSrcDirectory)jit\dll\jit.nativeproj" />
+
+        <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 -->
+        <ProjectReference Condition="'$(_BuildBranch)' != 'CodeGen' and ('$(BuildArchitecture)' == 'amd64' or  '$(BuildArchitecture)' == 'arm64')" Include="$(ClrSrcDirectory)jit\dll\jit.nativeproj" />
+        <ProjectReference Condition="'$(_BuildBranch)' != 'CodeGen' and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" Include="$(ClrSrcDirectory)jit32\dll\jit.nativeproj" />
+    </ItemGroup>
+
     <ItemGroup>
         <ImportLib Include="$(ClrLibPath)\cee_wks.lib">
             <ProjectReference>$(ClrSrcDirectory)vm\wks\wks.nativeproj</ProjectReference>
@@ -102,14 +113,6 @@
         </ImportLib>
         <LinkPreCrtLibs Include="$(ClrLibPath)\utilcode.lib" />
 
-        <ImportLib Condition="'$(FeatureMergeJitAndEngine)'=='true' and ('$(BuildArchitecture)' != 'amd64' and '$(BuildArchitecture)' != 'arm64')" Include="$(ClrLibPath)\clrjit.lib">
-            <ProjectReference>$(ClrSrcDirectory)jit32\dll\jit.nativeproj</ProjectReference>
-        </ImportLib>
-
-        <ImportLib Condition="'$(FeatureMergeJitAndEngine)'=='true' and ('$(BuildArchitecture)' == 'amd64' or  '$(BuildArchitecture)' == 'arm64') and '$(BuildProjectName)' == 'CoreSys'" Include="$(ClrLibPath)\clrjit.lib">
-          <ProjectReference>$(ClrSrcDirectory)jit\dll\jit.nativeproj</ProjectReference>
-        </ImportLib>
- 
         <ImportLib Include="$(ClrLibPath)\ildbsymlib.lib">
             <ProjectReference>$(ClrSrcDirectory)debug\ildbsymlib\HostLocal\ildbsymlib.nativeproj</ProjectReference>
         </ImportLib>
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp
index 873d336010..c031727534 100644
--- a/src/gc/gc.cpp
+++ b/src/gc/gc.cpp
@@ -23,29 +23,6 @@
 
 #define USE_INTROSORT
 
-// defines for ETW events.
-#define ETW_TYPE_GC_MARK_1              21   // after marking stack roots
-#define ETW_TYPE_GC_MARK_2              22   // after marking finalize queue roots
-#define ETW_TYPE_GC_MARK_3              23   // after marking handles
-#define ETW_TYPE_GC_MARK_4              24   // after marking cards
-
-#define ETW_TYPE_BGC_BEGIN              25
-#define ETW_TYPE_BGC_1ST_NONCON_END     26
-#define ETW_TYPE_BGC_1ST_CON_END        27
-#define ETW_TYPE_BGC_2ND_NONCON_BEGIN   28
-#define ETW_TYPE_BGC_2ND_NONCON_END     29
-#define ETW_TYPE_BGC_2ND_CON_BEGIN      30
-#define ETW_TYPE_BGC_2ND_CON_END        31
-#define ETW_TYPE_BGC_PLAN_END           32
-#define ETW_TYPE_BGC_SWEEP_END          33
-
-#define ETW_TYPE_BGC_DRAIN_MARK_LIST    34
-#define ETW_TYPE_BGC_REVISIT            35
-#define ETW_TYPE_BGC_OVERFLOW           36
-
-#define ETW_TYPE_ALLOC_WAIT_BEGIN       37
-#define ETW_TYPE_ALLOC_WAIT_END         38
-
 #if defined(GC_PROFILING) || defined(FEATURE_EVENT_TRACE)
 inline BOOL ShouldTrackMovementForProfilerOrEtw()
 {
@@ -133,7 +110,17 @@ static const char* const str_gc_reasons[] =
     "oos_loh",
     "induced_noforce",
     "gcstress",
-    "induced_lowmem"
+    "induced_lowmem",
+    "induced_compacting"
+};
+
+static const char* const str_gc_pause_modes[] = 
+{
+    "batch",
+    "interactive",
+    "low_latency",
+    "sustained_low_latency",
+    "no_gc"
 };
 #endif // defined(DT_LOG) || defined(TRACE_GC)
 
@@ -584,13 +571,26 @@ struct join_structure
     VOLATILE(BOOL) wait_done;
 };
 
-typedef enum _join_type {
-    type_last_join, type_join, type_restart
-} join_type;
+enum join_type 
+{
+    type_last_join = 0, 
+    type_join = 1, 
+    type_restart = 2, 
+    type_first_r_join = 3, 
+    type_r_join = 4
+};
+
+enum join_time 
+{
+    time_start = 0, 
+    time_end = 1
+};
 
-typedef enum _join_time {
-    time_start, time_end
-} join_time;
+enum join_heap_index
+{
+    join_heap_restart = 100,
+    join_heap_r_restart = 200
+};
 
 struct join_event
 {
@@ -664,9 +664,9 @@ public:
         }
     }
 
-    inline void fire_event (ULONG heap, join_time time, join_type type)
+    inline void fire_event (int heap, join_time time, join_type type, int join_id)
     {
-        FireEtwGCJoin_V1(heap, time, type, GetClrInstanceId());
+        FireEtwGCJoin_V2(heap, time, type, GetClrInstanceId(), join_id);
     }
 
     void join (gc_heap* gch, int join_id)
@@ -684,7 +684,7 @@ public:
             dprintf (JOIN_LOG, ("join%d(%d): Join() Waiting...join_lock is now %d", 
                 flavor, join_id, (LONG)(join_struct.join_lock)));
 
-            fire_event (gch->heap_number, time_start, type_join);
+            fire_event (gch->heap_number, time_start, type_join, join_id);
 
             //busy wait around the color
             if (color == join_struct.lock_color)
@@ -728,7 +728,7 @@ respin:
                     flavor, join_id, (LONG)(join_struct.join_lock)));
             }
 
-            fire_event (gch->heap_number, time_end, type_join);
+            fire_event (gch->heap_number, time_end, type_join, join_id);
 
             // last thread out should reset event
             if (FastInterlockDecrement(&join_struct.join_restart) == 0)
@@ -746,7 +746,7 @@ respin:
         }
         else
         {
-            fire_event (gch->heap_number, time_start, type_last_join);
+            fire_event (gch->heap_number, time_start, type_last_join, join_id);
 
             join_struct.joined_p = TRUE;
             dprintf (JOIN_LOG, ("join%d(%d): Last thread to complete the join, setting id", flavor, join_id));
@@ -785,7 +785,7 @@ respin:
             {
                 dprintf (JOIN_LOG, ("r_join() Waiting..."));
 
-                fire_event (gch->heap_number, time_start, type_join);
+                fire_event (gch->heap_number, time_start, type_join, join_id);
 
                 //busy wait around the color
                 if (!join_struct.wait_done)
@@ -822,7 +822,7 @@ respin:
                     dprintf (JOIN_LOG, ("r_join() done"));
                 }
 
-                fire_event (gch->heap_number, time_end, type_join);
+                fire_event (gch->heap_number, time_end, type_join, join_id);
 
 #ifdef JOIN_STATS
                 // parallel execution starts here
@@ -835,6 +835,7 @@ respin:
         }
         else
         {
+            fire_event (gch->heap_number, time_start, type_first_r_join, join_id);
             return TRUE;
         }
     }
@@ -877,7 +878,7 @@ respin:
         }
 #endif //JOIN_STATS
 
-        fire_event (100, time_start, type_restart);
+        fire_event (join_heap_restart, time_start, type_restart, -1);
         assert (join_struct.joined_p);
         join_struct.joined_p = FALSE;
         join_struct.join_lock = join_struct.n_threads;
@@ -889,7 +890,7 @@ respin:
 
 //        printf("Set joined_event %d\n", !join_struct.lock_color);
 
-        fire_event (100, time_end, type_restart);
+        fire_event (join_heap_restart, time_end, type_restart, -1);
 
 #ifdef JOIN_STATS
         start[thd] = GetCycleCount32();
@@ -906,8 +907,10 @@ respin:
     {
         if (join_struct.n_threads != 1)
         {
+            fire_event (join_heap_r_restart, time_start, type_restart, -1);
             join_struct.wait_done = TRUE;
             join_struct.joined_event[first_thread_arrived].Set();
+            fire_event (join_heap_r_restart, time_end, type_restart, -1);
         }
     }
 
@@ -2588,6 +2591,7 @@ alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1];
 
 dynamic_data gc_heap::dynamic_data_table [NUMBERGENERATIONS+1];
 gc_history_per_heap gc_heap::gc_data_per_heap;
+size_t gc_heap::maxgen_pinned_compact_before_advance = 0;
 
 SPTR_IMPL_NS_INIT(BYTE, WKS, gc_heap, alloc_allocated, 0);
 
@@ -2634,7 +2638,7 @@ SPTR_IMPL_NS_INIT(CFinalize, WKS, gc_heap, finalize_queue, 0);
 void gen_to_condemn_tuning::print (int heap_num)
 {
 #ifdef DT_LOG
-    dprintf (DT_LOG_0, ("condemned reasons"));
+    dprintf (DT_LOG_0, ("condemned reasons (%d %d)", condemn_reasons_gen, condemn_reasons_condition));
     dprintf (DT_LOG_0, ("%s", record_condemn_reasons_gen_header));
     gc_condemn_reason_gen r_gen;
     for (int i = 0; i < gcrg_max; i++)
@@ -2658,29 +2662,35 @@ void gen_to_condemn_tuning::print (int heap_num)
 
 void gc_generation_data::print (int heap_num, int gen_num)
 {
-#ifdef SIMPLE_DPRINTF
-#ifdef DT_LOG
-    dprintf (DT_LOG_0, ("[%2d]gen%d beg %Id fl %Id fo %Id end %Id fl %Id fo %Id in %Id out %Id surv %Id alloc %Id",
+#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
+    dprintf (DT_LOG_0, ("[%2d]gen%d beg %Id fl %Id fo %Id end %Id fl %Id fo %Id in %Id p %Id np %Id alloc %Id",
                 heap_num, gen_num, 
                 size_before, 
                 free_list_space_before, free_obj_space_before,
                 size_after, 
                 free_list_space_after, free_obj_space_after, 
-                in, out,
-                surv,
+                in, pinned_surv, npinned_surv,
                 new_allocation));
-#endif //DT_LOG
-#endif //SIMPLE_DPRINTF
+#endif //SIMPLE_DPRINTF && DT_LOG
 }
 
-void gc_history_per_heap::print (int heap_num)
+void gc_history_per_heap::print()
 {
-#ifdef DT_LOG
+#if defined(SIMPLE_DPRINTF) && defined(DT_LOG)
     for (int i = 0; i < (sizeof (gen_data)/sizeof (gc_generation_data)); i++)
     {
-        gen_data[i].print (heap_num, i);
+        gen_data[i].print (heap_index, i);
     }
-    dprintf (DT_LOG_0, ("[%2d]mp %d", heap_num, mem_pressure));
+
+    dprintf (DT_LOG_0, ("fla %Id flr %Id esa %Id ca %Id pa %Id paa %Id, rfle %d, ec %Id", 
+                    maxgen_size_info.free_list_allocated,
+                    maxgen_size_info.free_list_rejected,
+                    maxgen_size_info.end_seg_allocated,
+                    maxgen_size_info.condemned_allocated,
+                    maxgen_size_info.pinned_allocated,
+                    maxgen_size_info.pinned_allocated_advance,
+                    maxgen_size_info.running_free_list_efficiency,
+                    extra_gen0_committed));
 
     int mechanism = 0;
     gc_mechanism_descr* descr = 0;
@@ -2693,12 +2703,12 @@ void gc_history_per_heap::print (int heap_num)
         {
             descr = &gc_mechanisms_descr[(gc_mechanism_per_heap)i];
             dprintf (DT_LOG_0, ("[%2d]%s%s", 
-                        heap_num,
+                        heap_index,
                         descr->name, 
                         (descr->descr)[mechanism]));
         }
     }
-#endif //DT_LOG
+#endif //SIMPLE_DPRINTF && DT_LOG
 }
 
 void gc_history_global::print()
@@ -2713,44 +2723,69 @@ void gc_history_global::print()
         str_settings[i * 2] = (get_mechanism_p ((gc_global_mechanism_p)i) ? 'Y' : 'N');
     }
 
-    dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|"));
+    dprintf (DT_LOG_0, ("[hp]|c|p|o|d|b|e|"));
     dprintf (DT_LOG_0, ("%4d|%s", num_heaps, str_settings));
-    dprintf (DT_LOG_0, ("Condemned gen%d(%s), youngest budget %Id(%d)",
+    dprintf (DT_LOG_0, ("Condemned gen%d(reason: %s; mode: %s), youngest budget %Id(%d), memload %d",
                         condemned_generation,
                         str_gc_reasons[reason],
+                        str_gc_pause_modes[pause_mode],                        
                         final_youngest_desired,
-                        gen0_reduction_count));
+                        gen0_reduction_count,
+                        mem_pressure));
 #endif //DT_LOG
 }
 
+void gc_heap::fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num)
+{
+    maxgen_size_increase* maxgen_size_info = &(current_gc_data_per_heap->maxgen_size_info);
+    FireEtwGCPerHeapHistory_V3(GetClrInstanceId(),
+                               (BYTE*)(maxgen_size_info->free_list_allocated),
+                               (BYTE*)(maxgen_size_info->free_list_rejected),
+                               (BYTE*)(maxgen_size_info->end_seg_allocated),
+                               (BYTE*)(maxgen_size_info->condemned_allocated),
+                               (BYTE*)(maxgen_size_info->pinned_allocated),
+                               (BYTE*)(maxgen_size_info->pinned_allocated_advance),
+                               maxgen_size_info->running_free_list_efficiency,
+                               current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons0(),
+                               current_gc_data_per_heap->gen_to_condemn_reasons.get_reasons1(),
+                               current_gc_data_per_heap->mechanisms[gc_compact],
+                               current_gc_data_per_heap->mechanisms[gc_heap_expand],
+                               current_gc_data_per_heap->heap_index,
+                               (BYTE*)(current_gc_data_per_heap->extra_gen0_committed),
+                               (max_generation + 2),
+                               sizeof (gc_generation_data),
+                               &(current_gc_data_per_heap->gen_data[0]));
+
+    current_gc_data_per_heap->print();
+    current_gc_data_per_heap->gen_to_condemn_reasons.print (heap_num);
+}
+
 void gc_heap::fire_pevents()
 {
 #ifndef CORECLR
     settings.record (&gc_data_global);
     gc_data_global.print();
 
-    FireEtwGCGlobalHeapHistory_V1(gc_data_global.final_youngest_desired, 
+    FireEtwGCGlobalHeapHistory_V2(gc_data_global.final_youngest_desired, 
                                   gc_data_global.num_heaps, 
                                   gc_data_global.condemned_generation, 
                                   gc_data_global.gen0_reduction_count, 
                                   gc_data_global.reason, 
                                   gc_data_global.global_mechanims_p, 
-                                  GetClrInstanceId());
+                                  GetClrInstanceId(),
+                                  gc_data_global.pause_mode, 
+                                  gc_data_global.mem_pressure);
 
 #ifdef MULTIPLE_HEAPS
     for (int i = 0; i < gc_heap::n_heaps; i++)
     {
         gc_heap* hp = gc_heap::g_heaps[i];
         gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap();
-        current_gc_data_per_heap->print (i);
-        current_gc_data_per_heap->gen_to_condemn_reasons.print (i);
-        FireEtwGCPerHeapHistorySpecial(*current_gc_data_per_heap, sizeof(hp->gc_data_per_heap), (UINT8)GetClrInstanceId());        
+        fire_per_heap_hist_event (current_gc_data_per_heap, hp->heap_number);
     }
 #else
     gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
-    FireEtwGCPerHeapHistorySpecial(*current_gc_data_per_heap, sizeof(gc_data_per_heap), (UINT8)GetClrInstanceId());        
-    current_gc_data_per_heap->print (0);
-    current_gc_data_per_heap->gen_to_condemn_reasons.print (heap_number);
+    fire_per_heap_hist_event (current_gc_data_per_heap, heap_number);
 #endif    
 #endif //!CORECLR
 }
@@ -4159,10 +4194,15 @@ static size_t get_valid_segment_size (BOOL large_seg=FALSE)
     }
 
 #ifdef MULTIPLE_HEAPS
-    if (g_SystemInfo.dwNumberOfProcessors > 4)
-        initial_seg_size /= 2;
-    if (g_SystemInfo.dwNumberOfProcessors > 8)
-        initial_seg_size /= 2;
+#ifdef _WIN64
+    if (!large_seg)
+#endif //_WIN64
+    {
+        if (g_SystemInfo.dwNumberOfProcessors > 4)
+            initial_seg_size /= 2;
+        if (g_SystemInfo.dwNumberOfProcessors > 8)
+            initial_seg_size /= 2;
+    }
 #endif //MULTIPLE_HEAPS
 
     // if seg_size is small but not 0 (0 is default if config not set)
@@ -5502,6 +5542,8 @@ void gc_mechanisms::record (gc_history_global* history)
     history->condemned_generation = condemned_generation;
     history->gen0_reduction_count = gen0_reduction_count;
     history->reason = reason;
+    history->pause_mode = (int)pause_mode;
+    history->mem_pressure = entry_memory_load;
     history->global_mechanims_p = 0;
 
     // start setting the boolean values.
@@ -6389,7 +6431,7 @@ BYTE*& card_table_lowest_address (DWORD* c_table)
 
 DWORD* translate_card_table (DWORD* ct)
 {
-    return (DWORD*)((BYTE*)ct - size_card_of (0, card_table_lowest_address( ct)));
+    return (DWORD*)((BYTE*)ct - card_word (gcard_of (card_table_lowest_address (ct))) * sizeof(DWORD));
 }
 
 inline
@@ -6805,15 +6847,29 @@ int gc_heap::grow_brick_card_tables (BYTE* start,
             GetProcessMemoryLoad (&st);
             BYTE* top = (BYTE*)0 + Align ((size_t)(st.ullTotalVirtual));
             size_t ps = ha-la;
-            BYTE* highest = max ((saved_g_lowest_address + 2*ps), saved_g_highest_address);
-            //BYTE* highest = saved_g_highest_address;
-            if (highest > top)
+#ifdef _WIN64
+            if (ps > (ULONGLONG)200*1024*1024*1024)
+                ps += (ULONGLONG)100*1024*1024*1024;
+            else
+#endif //_WIN64
+                ps *= 2;
+
+            if (saved_g_lowest_address < g_lowest_address)
             {
-                highest = top;
+                if (ps > (size_t)g_lowest_address)
+                    saved_g_lowest_address = (BYTE*)OS_PAGE_SIZE;
+                else
+                {
+                    assert (((size_t)g_lowest_address - ps) >= OS_PAGE_SIZE);
+                    saved_g_lowest_address = min (saved_g_lowest_address, (g_lowest_address - ps));
+                }
             }
-            if (highest > saved_g_highest_address)
+
+            if (saved_g_highest_address > g_highest_address)
             {
-                saved_g_highest_address = highest;
+                saved_g_highest_address = max ((saved_g_lowest_address + ps), saved_g_highest_address);
+                if (saved_g_highest_address > top)
+                    saved_g_highest_address = top;
             }
         }
         dprintf (GC_TABLE_LOG, ("Growing card table [%Ix, %Ix[",
@@ -10624,6 +10680,15 @@ alloc_list& allocator::alloc_list_of (unsigned int bn)
         return buckets [bn-1];
 }
 
+size_t& allocator::alloc_list_damage_count_of (unsigned int bn)
+{
+    assert (bn < num_buckets);
+    if (bn == 0)
+        return first_bucket.alloc_list_damage_count();
+    else
+        return buckets [bn-1].alloc_list_damage_count();
+}
+
 void allocator::unlink_item (unsigned int bn, BYTE* item, BYTE* prev_item, BOOL use_undo_p)
 {
     //unlink the free_item
@@ -10632,7 +10697,9 @@ void allocator::unlink_item (unsigned int bn, BYTE* item, BYTE* prev_item, BOOL
     {
         if (use_undo_p && (free_list_undo (prev_item) == UNDO_EMPTY))
         {
+            assert (item == free_list_slot (prev_item));
             free_list_undo (prev_item) = item;
+            alloc_list_damage_count_of (bn)++;
         }
         free_list_slot (prev_item) = free_list_slot(item);
     }
@@ -10748,17 +10815,21 @@ void allocator::copy_from_alloc_list (alloc_list* fromalist)
             //new items may have been added during the plan phase 
             //items may have been unlinked. 
             BYTE* free_item = alloc_list_head_of (i);
-            while (free_item)
+            size_t count =  alloc_list_damage_count_of (i);
+            while (free_item && count)
             {
                 assert (((CObjectHeader*)free_item)->IsFree());
                 if ((free_list_undo (free_item) != UNDO_EMPTY))
                 {
+                    count--;
                     free_list_slot (free_item) = free_list_undo (free_item);
                     free_list_undo (free_item) = UNDO_EMPTY;
                 }
 
                 free_item = free_list_slot (free_item);
             }
+
+            alloc_list_damage_count_of (i) = 0; 
         }
 #ifdef DEBUG
         BYTE* tail_item = alloc_list_tail_of (i);
@@ -10776,19 +10847,28 @@ void allocator::commit_alloc_list_changes()
         {
             //remove the undo info from list. 
             BYTE* free_item = alloc_list_head_of (i);
-            while (free_item)
+            size_t count = alloc_list_damage_count_of (i);
+            while (free_item && count)
             {
                 assert (((CObjectHeader*)free_item)->IsFree());
-                free_list_undo (free_item) = UNDO_EMPTY;
+
+                if (free_list_undo (free_item) != UNDO_EMPTY)
+                {
+                    free_list_undo (free_item) = UNDO_EMPTY;
+                    count--;
+                }
+
                 free_item = free_list_slot (free_item);
             }
+
+            alloc_list_damage_count_of (i) = 0; 
         }
     }
 }
 
 void gc_heap::adjust_limit_clr (BYTE* start, size_t limit_size,
                                 alloc_context* acontext, heap_segment* seg,
-                                int align_const)
+                                int align_const, int gen_number)
 {
     //probably should pass seg==0 for free lists.
     if (seg)
@@ -10809,7 +10889,9 @@ void gc_heap::adjust_limit_clr (BYTE* start, size_t limit_size,
             dprintf (3, ("filling up hole [%Ix, %Ix[", (size_t)hole, (size_t)hole + size + Align (min_obj_size, align_const)));
             // when we are finishing an allocation from a free list
             // we know that the free area was Align(min_obj_size) larger
-            make_unused_array (hole, size + Align (min_obj_size, align_const));
+            size_t free_obj_size = size + Align (min_obj_size, align_const);
+            make_unused_array (hole, free_obj_size);
+            generation_free_obj_space (generation_of (gen_number)) += free_obj_size;
         }
         acontext->alloc_ptr = start;
     }
@@ -11273,7 +11355,7 @@ BOOL gc_heap::a_fit_free_list_p (int gen_number,
                     }
                     generation_free_list_space (gen) -= limit;
 
-                    adjust_limit_clr (free_list, limit, acontext, 0, align_const);
+                    adjust_limit_clr (free_list, limit, acontext, 0, align_const, gen_number);
 
                     can_fit = TRUE;
                     goto end;
@@ -11465,7 +11547,7 @@ BOOL gc_heap::a_fit_free_list_large_p (size_t size,
                     else
 #endif //BACKGROUND_GC
                     {
-                        adjust_limit_clr (free_list, limit, acontext, 0, align_const);
+                        adjust_limit_clr (free_list, limit, acontext, 0, align_const, gen_number);
                     }
 
                     //fix the limit to compensate for adjust_limit_clr making it too short 
@@ -11579,7 +11661,7 @@ found_fit:
     else
 #endif //BACKGROUND_GC
     {
-        adjust_limit_clr (old_alloc, limit, acontext, seg, align_const);
+        adjust_limit_clr (old_alloc, limit, acontext, seg, align_const, gen_number);
     }
 
     return TRUE;
@@ -12110,7 +12192,7 @@ BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr,
     {
         bgc_in_progress = TRUE;
         size_t last_full_compact_gc_count = get_full_compact_gc_count();
-        wait_for_background (awr_loh_oos_bgc);
+        wait_for_background (awr);
         size_t current_full_compact_gc_count = get_full_compact_gc_count();
         if (current_full_compact_gc_count > last_full_compact_gc_count)
         {
@@ -12173,7 +12255,7 @@ BOOL gc_heap::trigger_full_compact_gc (gc_reason gr,
 #ifdef BACKGROUND_GC
     if (recursive_gc_sync::background_running_p())
     {
-        wait_for_background (awr_loh_oos_bgc);
+        wait_for_background ((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc);
         dprintf (2, ("waited for BGC - done"));
     }
 #endif //BACKGROUND_GC
@@ -13141,11 +13223,13 @@ BYTE* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
                     dprintf (3, ("considering free list %Ix", (size_t)free_list));
 
                     size_t free_list_size = unused_array_size (free_list);
+
                     if (size_fit_p (size REQD_ALIGN_AND_OFFSET_ARG, free_list, (free_list + free_list_size),
                                     old_loc, USE_PADDING_TAIL | pad_in_front))
                     {
                         dprintf (4, ("F:%Ix-%Id",
                                      (size_t)free_list, free_list_size));
+
                         gen_allocator->unlink_item (a_l_idx, free_list, prev_free_item, !discard_p);
                         generation_free_list_space (gen) -= free_list_size;
                         remove_gen_free (gen->gen_num, free_list_size);
@@ -13879,6 +13963,7 @@ int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation,
             else
             {
                 n = max_generation - 1;
+                gc_data_global.set_mechanism_p (global_elevation);
             }
         }
         else
@@ -14278,13 +14363,13 @@ int gc_heap::generation_to_condemn (int n_initial,
             dprintf (GTC_LOG, ("ml: %d", ms.dwMemoryLoad));
         }
 
-#ifdef _WIN64
         if (heap_number == 0)
         {
+#ifdef _WIN64
             available_physical_mem = ms.ullAvailPhys;
+#endif //_WIN64
             local_settings->entry_memory_load = ms.dwMemoryLoad;
         }
-#endif //_WIN64
         
         // @TODO: Force compaction more often under GCSTRESS
         if (ms.dwMemoryLoad >= 90 || low_memory_detected)
@@ -14394,6 +14479,7 @@ int gc_heap::generation_to_condemn (int n_initial,
                 dprintf (GTC_LOG, ("%Id left in gen2 alloc (%Id)", 
                     dd_new_allocation (dd_max), dd_desired_allocation (dd_max)));
                 n = max_generation;
+                local_condemn_reasons->set_condition (gen_almost_max_alloc);
             }
         }
 
@@ -14521,7 +14607,6 @@ exit:
 
         if (check_memory)
         {
-            gc_data_per_heap.mem_pressure = ms.dwMemoryLoad;
             fgm_result.available_pagefile_mb = (size_t)(ms.ullAvailPageFile / (1024 * 1024));
         }
 
@@ -14844,7 +14929,8 @@ void gc_heap::gc1()
 
         if (n != max_generation)
         {
-            for (int gen_number = (n+1); gen_number <= (max_generation+1); gen_number++)
+            int gen_num_for_data = ((n < (max_generation - 1)) ? (n + 1) : (max_generation + 1));
+            for (int gen_number = (n + 1); gen_number <= gen_num_for_data; gen_number++)
             {
                 gc_data_per_heap.gen_data[gen_number].size_after = generation_size (gen_number);
                 gc_data_per_heap.gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number));
@@ -14852,6 +14938,8 @@ void gc_heap::gc1()
             }
         }
 
+        gc_data_per_heap.maxgen_size_info.running_free_list_efficiency = (DWORD)(generation_allocator_efficiency (generation_of (max_generation)) * 100);
+
         free_list_info (max_generation, "after computing new dynamic data");
         
         if (heap_number == 0)
@@ -14860,7 +14948,7 @@ void gc_heap::gc1()
                 dd_collection_count (dynamic_data_of (0)), 
                 settings.condemned_generation,
                 dd_gc_elapsed_time (dynamic_data_of (0))));
-       }
+        }
 
         for (int gen_number = 0; gen_number <= (max_generation + 1); gen_number++)
         {
@@ -14872,6 +14960,7 @@ void gc_heap::gc1()
     if (n < max_generation)
     {
         compute_promoted_allocation (1 + n);
+
         dynamic_data* dd = dynamic_data_of (1 + n);
         size_t new_fragmentation = generation_free_list_space (generation_of (1 + n)) + 
                                    generation_free_obj_space (generation_of (1 + n));
@@ -15912,6 +16001,8 @@ int gc_heap::garbage_collect (int n)
 
         memset (&gc_data_per_heap, 0, sizeof (gc_data_per_heap));
         gc_data_per_heap.heap_index = heap_number;
+        if (heap_number == 0)
+            memset (&gc_data_global, 0, sizeof (gc_data_global));
         memset (&fgm_result, 0, sizeof (fgm_result));
         settings.reason = gc_trigger_reason;
         verify_pinned_queue_p = FALSE;
@@ -16108,14 +16199,17 @@ int gc_heap::garbage_collect (int n)
 #endif //MULTIPLE_HEAPS
         }
 
-        for (int i = 0; i <= (max_generation+1); i++)
         {
-            gc_data_per_heap.gen_data[i].size_before = generation_size (i);
-            generation* gen = generation_of (i);
-            gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen);
-            gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen);
+            int gen_num_for_data = ((settings.condemned_generation < (max_generation - 1)) ? 
+                                    (settings.condemned_generation + 1) : (max_generation + 1));
+            for (int i = 0; i <= gen_num_for_data; i++)
+            {
+                gc_data_per_heap.gen_data[i].size_before = generation_size (i);
+                generation* gen = generation_of (i);
+                gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen);
+                gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen);
+            }
         }
-
         descr_generations (TRUE);
 //    descr_card_table();
 
@@ -18523,9 +18617,16 @@ size_t gc_heap::get_total_heap_size()
     return total_heap_size;
 }
 
+void fire_mark_event (int heap_num, int root_type, size_t bytes_marked)
+{
+    dprintf (DT_LOG_0, ("-----------[%d]mark %d: %Id", heap_num, root_type, bytes_marked));
+    FireEtwGCMarkWithType (heap_num, GetClrInstanceId(), root_type, bytes_marked);
+}
+
 //returns TRUE is an overflow happened.
 BOOL gc_heap::process_mark_overflow(int condemned_gen_number)
 {
+    size_t last_promoted_bytes = promoted_bytes (heap_number);
     BOOL  overflow_p = FALSE;
 recheck:
     if ((! (max_overflow_address == 0) ||
@@ -18563,6 +18664,10 @@ recheck:
         goto recheck;
     }
 
+    size_t current_promoted_bytes = promoted_bytes (heap_number);
+
+    if (current_promoted_bytes != last_promoted_bytes)
+        fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_OVERFLOW, (current_promoted_bytes - last_promoted_bytes));
     return overflow_p;
 }
 
@@ -18635,37 +18740,6 @@ void gc_heap::process_mark_overflow_internal (int condemned_gen_number,
     }
 }
 
-inline 
-void fire_mark_event (int heap_num, int mark_num)
-{
-    switch(mark_num)
-    {
-        case ETW_TYPE_GC_MARK_1:
-            FireEtwGCMarkStackRoots(heap_num, GetClrInstanceId());
-            FireEtwPrvGCMarkStackRoots_V1(heap_num, GetClrInstanceId());
-            break;
-
-        case ETW_TYPE_GC_MARK_2:
-            FireEtwGCMarkFinalizeQueueRoots(heap_num, GetClrInstanceId());
-            FireEtwPrvGCMarkFinalizeQueueRoots_V1(heap_num, GetClrInstanceId());
-            break;
-
-        case ETW_TYPE_GC_MARK_3:
-            FireEtwGCMarkHandles(heap_num, GetClrInstanceId());
-            FireEtwPrvGCMarkHandles_V1(heap_num, GetClrInstanceId());
-            break;
-
-        case ETW_TYPE_GC_MARK_4:
-            FireEtwGCMarkOlderGenerationRoots(heap_num, GetClrInstanceId());
-            FireEtwPrvGCMarkCards_V1(heap_num, GetClrInstanceId());
-            break;
-
-        default:
-            _ASSERTE(mark_num==ETW_TYPE_GC_MARK_1 || mark_num==ETW_TYPE_GC_MARK_2 || mark_num==ETW_TYPE_GC_MARK_3 || mark_num==ETW_TYPE_GC_MARK_4);
-            break;
-    }
-}
-
 // Scanning for promotion for dependent handles need special handling. Because the primary holds a strong
 // reference to the secondary (when the primary itself is reachable) and this can cause a cascading series of
 // promotions (the secondary of one handle is or promotes the primary of another) we might need to perform the
@@ -18887,6 +18961,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
         gen0_must_clear_bricks--;
 #endif //FFIND_OBJECT
 
+    size_t last_promoted_bytes = 0;
+
     promoted_bytes (heap_number) = 0;
     reset_mark_stack();
 
@@ -18976,6 +19052,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
         if ((condemned_gen_number == max_generation) && (num_sizedrefs > 0))
         {
             CNameSpace::GcScanSizedRefs(GCHeap::Promote, condemned_gen_number, max_generation, &sc);
+            fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_SIZEDREF, (promoted_bytes (heap_number) - last_promoted_bytes));
+            last_promoted_bytes = promoted_bytes (heap_number);
 
 #ifdef MULTIPLE_HEAPS
             gc_t_join.join(this, gc_join_scan_sizedref_done);
@@ -18993,7 +19071,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
                                 condemned_gen_number, max_generation,
                                 &sc);
 
-        fire_mark_event (heap_number, ETW_TYPE_GC_MARK_1);
+        fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_STACK, (promoted_bytes (heap_number) - last_promoted_bytes));
+        last_promoted_bytes = promoted_bytes (heap_number);
 
 #ifdef BACKGROUND_GC
         if (recursive_gc_sync::background_running_p())
@@ -19007,7 +19086,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
         finalize_queue->GcScanRoots(GCHeap::Promote, heap_number, 0);
 #endif // FEATURE_PREMORTEM_FINALIZATION
 
-        fire_mark_event (heap_number, ETW_TYPE_GC_MARK_2);
+        fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_FQ, (promoted_bytes (heap_number) - last_promoted_bytes));
+        last_promoted_bytes = promoted_bytes (heap_number);
 
 // MTHTS
         {
@@ -19016,7 +19096,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
             CNameSpace::GcScanHandles(GCHeap::Promote,
                                       condemned_gen_number, max_generation,
                                       &sc);
-            fire_mark_event (heap_number, ETW_TYPE_GC_MARK_3);
+            fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_HANDLES, (promoted_bytes (heap_number) - last_promoted_bytes));
+            last_promoted_bytes = promoted_bytes (heap_number);
         }
 
 #ifdef TRACE_GC
@@ -19060,7 +19141,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
 
             dprintf (3, ("marked by cards: %Id", 
                 (promoted_bytes (heap_number) - promoted_before_cards)));
-            fire_mark_event (heap_number, ETW_TYPE_GC_MARK_4);
+            fire_mark_event (heap_number, ETW::GCLog::ETW_GC_INFO::GC_ROOT_OLDER, (promoted_bytes (heap_number) - last_promoted_bytes));
+            last_promoted_bytes = promoted_bytes (heap_number);
         }
     }
 
@@ -19659,7 +19741,7 @@ size_t gc_heap::update_brick_table (BYTE* tree, size_t current_brick,
 void gc_heap::plan_generation_start (generation* gen, generation* consing_gen, BYTE* next_plug_to_allocate)
 {
 #ifdef _WIN64
-    // We should never demote big plugs to ephemeral generations.
+    // We should never demote big plugs to gen0.
     if (gen == youngest_generation)
     {
         heap_segment* seg = ephemeral_heap_segment;
@@ -19928,9 +20010,11 @@ retry:
 
         if (active_new_gen_number != max_generation)
         {
-            if ((active_new_gen_number == (max_generation - 1)) && !demote_gen1_p)
+            if (active_new_gen_number == (max_generation - 1))
             {
-                advance_pins_for_demotion (consing_gen);
+                maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation));
+                if (!demote_gen1_p)
+                    advance_pins_for_demotion (consing_gen);
             }
 
             plan_generation_start (generation_of (active_new_gen_number), consing_gen, x);
@@ -21294,6 +21378,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
                     else
                     {
                         allocate_in_condemned = TRUE;
+
                         new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number, 
 #ifdef SHORT_PLUGS
                                                                          &convert_to_pinned_p,
@@ -21514,9 +21599,11 @@ void gc_heap::plan_phase (int condemned_gen_number)
                 {
                     active_new_gen_number--;
 
-                    if ((active_new_gen_number == (max_generation - 1)) && !demote_gen1_p)
+                    if (active_new_gen_number == (max_generation - 1))
                     {
-                        advance_pins_for_demotion (consing_gen);
+                        maxgen_pinned_compact_before_advance = generation_pinned_allocation_compact_size (generation_of (max_generation));
+                        if (!demote_gen1_p)
+                            advance_pins_for_demotion (consing_gen);
                     }
 
                     generation* gen = generation_of (active_new_gen_number);
@@ -21628,14 +21715,11 @@ void gc_heap::plan_phase (int condemned_gen_number)
 #endif //SIMPLE_DPRINTF
     }
 
-#ifdef FREE_USAGE_STATS
     if (settings.condemned_generation == (max_generation - 1 ))
     {
         size_t plan_gen2_size = generation_plan_size (max_generation);
         size_t growth = plan_gen2_size - old_gen2_size;
 
-        dprintf (1, ("gen2's FL effi: %d", (int)(generation_allocator_efficiency (generation_of (max_generation)) * 100)));
-
         if (growth > 0)
         {
             dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, gen1 c alloc: %Id", 
@@ -21649,9 +21733,11 @@ void gc_heap::plan_phase (int condemned_gen_number)
                 generation_condemned_allocated (generation_of (max_generation - 1))));
         }
 
-        generation*  older_gen = generation_of (settings.condemned_generation + 1);
+        generation* older_gen = generation_of (settings.condemned_generation + 1);
         size_t rejected_free_space = generation_free_obj_space (older_gen) - r_free_obj_space;
-        size_t free_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated;
+        size_t free_list_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated;
+        size_t end_seg_allocated = generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated;
+        size_t condemned_allocated = generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated;
 
         dprintf (1, ("older gen's free alloc: %Id->%Id, seg alloc: %Id->%Id, condemned alloc: %Id->%Id",
                     r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen),
@@ -21659,30 +21745,27 @@ void gc_heap::plan_phase (int condemned_gen_number)
                     r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen)));
 
         dprintf (1, ("this GC did %Id free list alloc(%Id bytes free space rejected), %Id seg alloc and %Id condemned alloc, gen1 condemned alloc is %Id", 
-            free_allocated,
-            rejected_free_space,
-            (generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated),
-            (generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated),
-             generation_condemned_allocated (generation_of (settings.condemned_generation))));
+            free_list_allocated, rejected_free_space, end_seg_allocated,
+            condemned_allocated, generation_condemned_allocated (generation_of (settings.condemned_generation))));
 
-        float running_free_list_efficiency = 0;
-        if ((free_allocated + rejected_free_space) != 0)
-        {
-            running_free_list_efficiency = (float) (free_allocated) / (float)(free_allocated + rejected_free_space);
-        }
+        maxgen_size_increase* maxgen_size_info = &(gc_data_per_heap.maxgen_size_info);
+        maxgen_size_info->free_list_allocated = free_list_allocated;
+        maxgen_size_info->free_list_rejected = rejected_free_space;
+        maxgen_size_info->end_seg_allocated = end_seg_allocated;
+        maxgen_size_info->condemned_allocated = condemned_allocated;
+        maxgen_size_info->pinned_allocated = maxgen_pinned_compact_before_advance;
+        maxgen_size_info->pinned_allocated_advance = generation_pinned_allocation_compact_size (generation_of (max_generation)) - maxgen_pinned_compact_before_advance;
 
-        float free_list_efficiency = 0;
-        if ((generation_free_list_allocated (older_gen) + generation_free_obj_space (older_gen)) != 0)
-        {
-            free_list_efficiency =
-            (float) (generation_free_list_allocated (older_gen)) / (float)(generation_free_list_allocated (older_gen) + generation_free_obj_space (older_gen));
-        }
+#ifdef FREE_USAGE_STATS
+        int free_list_efficiency = 0;
+        if ((free_list_allocated + rejected_free_space) != 0)
+            free_list_efficiency = (int)(((float) (free_list_allocated) / (float)(free_list_allocated + rejected_free_space)) * (float)100);
+
+        int running_free_list_efficiency = (int)(generation_allocator_efficiency(older_gen)*100);
 
-        dprintf (1, ("gen%d running free list alloc effi: %d%%(%d%%), current effi: %d%%",
+        dprintf (1, ("gen%d free list alloc effi: %d%%, current effi: %d%%",
                     older_gen->gen_num,
-                    (int)(running_free_list_efficiency*100), 
-                    (int)(free_list_efficiency*100),
-                    (int)(generation_allocator_efficiency(older_gen)*100)));
+                    free_list_efficiency, running_free_list_efficiency));
 
         dprintf (1, ("gen2 free list change"));
         for (int j = 0; j < NUM_GEN_POWER2; j++)
@@ -21694,8 +21777,8 @@ void gc_heap::plan_phase (int condemned_gen_number)
                 (SSIZE_T)(r_older_gen_free_space[j] - older_gen->gen_free_spaces[j]),
                 (generation_of(max_generation - 1))->gen_plugs[j]));
         }
-    }
 #endif //FREE_USAGE_STATS
+    }
 
     size_t fragmentation =
         generation_fragmentation (generation_of (condemned_gen_number),
@@ -29000,20 +29083,13 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
     if (dd_begin_data_size (dd) == 0)
     {
         size_t new_allocation = dd_default_new_allocation (dd);
-        current_gc_data_per_heap->gen_data[gen_number].new_allocation = new_allocation;
-        if ((gen_number == 0) && (pass == 1))
-        {
-            current_gc_data_per_heap->gen_data[max_generation+2].new_allocation = new_allocation;
-        }
-        
+        current_gc_data_per_heap->gen_data[gen_number].new_allocation = new_allocation;        
         return new_allocation;
     }
     else
     {
         float     cst;
         size_t    previous_desired_allocation = dd_desired_allocation (dd);
-        //ptrdiff_t allocation = (previous_desired_allocation - dd_gc_new_allocation (dd));
-        ptrdiff_t allocation = (previous_desired_allocation - dd_new_allocation (dd));
         size_t    current_size = dd_current_size (dd);
         float     max_limit = dd_max_limit (dd);
         float     limit = dd_limit (dd);
@@ -29128,23 +29204,18 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
         size_t new_allocation_ret = 
             Align (new_allocation, get_alignment_constant (!(gen_number == (max_generation+1))));
         int gen_data_index = gen_number;
-        if ((gen_number == 0) && (pass == 1))
-        {
-            gen_data_index = max_generation+2;
-        }
         gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_data_index]);
-        gen_data->surv = (size_t)(cst*100);
         gen_data->new_allocation = new_allocation_ret;
 
         dd_surv (dd) = cst;
 
 #ifdef SIMPLE_DPRINTF
         dprintf (1, ("h%d g%d surv: %Id current: %Id alloc: %Id (%d%%) f: %d%% new-size: %Id new-alloc: %Id",
-                     heap_number, gen_number, out, current_size, allocation,
+                     heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)),
                      (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
 #else
         dprintf (1,("gen: %d in: %Id out: %Id ", gen_number, generation_allocation_size (generation_of (gen_number)), out));
-        dprintf (1,("current: %Id alloc: %Id ", current_size, allocation));
+        dprintf (1,("current: %Id alloc: %Id ", current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd))));
         dprintf (1,(" surv: %d%% f: %d%% new-size: %Id new-alloc: %Id",
                     (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
 #endif //SIMPLE_DPRINTF
@@ -29251,6 +29322,10 @@ size_t  gc_heap::compute_in (int gen_number)
 
     dd_gc_new_allocation (dd) -= in;
 
+    gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
+    gc_generation_data* gen_data = &(current_gc_data_per_heap->gen_data[gen_number]);
+    gen_data->in = in;
+
     generation_allocation_size (generation_of (gen_number)) = 0;
     return in;
 }
@@ -29354,7 +29429,6 @@ void gc_heap::compute_new_dynamic_data (int gen_number)
     gen_data->size_after = total_gen_size;
     gen_data->free_list_space_after = generation_free_list_space (gen);
     gen_data->free_obj_space_after = generation_free_obj_space (gen);
-    gen_data->in = in;
 
     if ((settings.pause_mode == pause_low_latency) && (gen_number <= 1))
     {
@@ -29376,7 +29450,6 @@ void gc_heap::compute_new_dynamic_data (int gen_number)
             dprintf (2, ("gen: %d final promoted: %Id", gen_number, final_promoted));
             dd_freach_previous_promotion (dd) = final_promoted;
             size_t lower_bound = desired_new_allocation  (dd, out-final_promoted, gen_number, 0);
-            gen_data->out = out - final_promoted;
 
             if (settings.condemned_generation == 0)
             {
@@ -29385,9 +29458,6 @@ void gc_heap::compute_new_dynamic_data (int gen_number)
             }
             else
             {
-                current_gc_data_per_heap->gen_data[max_generation+2] = *gen_data;
-                current_gc_data_per_heap->gen_data[max_generation+2].out = out;
-
                 size_t higher_bound = desired_new_allocation (dd, out, gen_number, 1);
 
                 // <TODO>This assert was causing AppDomains\unload\test1n\test1nrun.bat to fail</TODO>
@@ -29412,11 +29482,13 @@ void gc_heap::compute_new_dynamic_data (int gen_number)
         }
         else
         {
-            gen_data->out = out;
             dd_desired_allocation (dd) = desired_new_allocation (dd, out, gen_number, 0);
         }
     }
 
+    gen_data->pinned_surv = dd_pinned_survived_size (dd);
+    gen_data->npinned_surv = dd_survived_size (dd) - dd_pinned_survived_size (dd);
+
     dd_gc_new_allocation (dd) = dd_desired_allocation (dd);
     //update counter
     dd_promoted_size (dd) = out;
@@ -29438,8 +29510,7 @@ void gc_heap::compute_new_dynamic_data (int gen_number)
         gen_data->size_after = total_gen_size;
         gen_data->free_list_space_after = generation_free_list_space (large_object_generation);
         gen_data->free_obj_space_after = generation_free_obj_space (large_object_generation);
-        gen_data->in = in;
-        gen_data->out = out;
+        gen_data->npinned_surv = out;
 #ifdef BACKGROUND_GC
         end_loh_size = total_gen_size;
 #endif //BACKGROUND_GC
@@ -29529,7 +29600,7 @@ void gc_heap::decommit_ephemeral_segment_pages()
     decommit_heap_segment_pages (ephemeral_heap_segment, slack_space);    
 
     gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
-    current_gc_data_per_heap->extra_gen0_committed = (ULONGLONG)(heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment));
+    current_gc_data_per_heap->extra_gen0_committed = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment);
 }
 
 size_t gc_heap::new_allocation_limit (size_t size, size_t free_size, int gen_number)
@@ -30523,7 +30594,8 @@ void gc_heap::background_ephemeral_sweep()
     {
         generation* gen_to_reset = generation_of (i);
         assert (generation_free_list_space (gen_to_reset) == 0);
-        assert (generation_free_obj_space (gen_to_reset) == 0);
+        // Can only assert free_list_space is 0, not free_obj_space as the allocator could have added 
+        // something there.
     }
 
     for (int i = (max_generation - 1); i >= 0; i--)
@@ -34282,7 +34354,7 @@ GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode)
 
     gc_reason reason = reason_empty;
     
-    if (low_memory_p ) 
+    if (low_memory_p) 
     {
         if (mode & collection_blocking)
             reason = reason_lowmemory_blocking;
@@ -34494,8 +34566,12 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason)
 #endif //TRACE_GC
 
         gc_heap::g_low_memory_status = (reason == reason_lowmemory) || 
-                                        (reason == reason_lowmemory_blocking) ||
-                                        g_bLowMemoryFromHost;
+                                       (reason == reason_lowmemory_blocking) ||
+                                       g_bLowMemoryFromHost;
+
+        if (g_bLowMemoryFromHost)
+            reason = reason_lowmemory_host;
+
         gc_trigger_reason = reason;
 
 #ifdef MULTIPLE_HEAPS
diff --git a/src/gc/gcimpl.h b/src/gc/gcimpl.h
index 3269abc82d..86de9cac1a 100644
--- a/src/gc/gcimpl.h
+++ b/src/gc/gcimpl.h
@@ -55,6 +55,7 @@ enum gc_reason
     reason_gcstress = 8,        // this turns into reason_induced & gc_mechanisms.stress_induced = true
     reason_lowmemory_blocking = 9,
     reason_induced_compacting = 10,
+    reason_lowmemory_host = 11,
     reason_max
 };
 
diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h
index 9da73bc6f7..9736cecdab 100644
--- a/src/gc/gcpriv.h
+++ b/src/gc/gcpriv.h
@@ -582,9 +582,7 @@ public:
     BOOL stress_induced;
 #endif // STRESS_HEAP
 
-#ifdef _WIN64
     DWORD entry_memory_load;
-#endif //_WIN64
 
     void init_mechanisms(); //for each GC
     void first_init(); // for the life of the EE
@@ -728,13 +726,17 @@ class alloc_list
 {
     BYTE* head;
     BYTE* tail;
+    size_t damage_count;
+
 public:
     BYTE*& alloc_list_head () { return head;}
     BYTE*& alloc_list_tail () { return tail;}
+    size_t& alloc_list_damage_count(){ return damage_count; }
     alloc_list()
     {
         head = 0; 
         tail = 0; 
+        damage_count = 0;
     }
 };
 
@@ -746,6 +748,7 @@ class allocator
     alloc_list first_bucket;
     alloc_list* buckets;
     alloc_list& alloc_list_of (unsigned int bn);
+    size_t& alloc_list_damage_count_of (unsigned int bn);
 
 public:
     allocator (unsigned int num_b, size_t fbs, alloc_list* b);
@@ -1212,6 +1215,9 @@ public:
     void verify_heap (BOOL begin_gc_p);
 #endif //VERIFY_HEAP
 
+    PER_HEAP_ISOLATED
+    void fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per_heap, int heap_num);
+
     PER_HEAP_ISOLATED
     void fire_pevents();
 
@@ -1815,7 +1821,7 @@ protected:
     PER_HEAP
     void adjust_limit_clr (BYTE* start, size_t limit_size,
                            alloc_context* acontext, heap_segment* seg,
-                           int align_const);
+                           int align_const, int gen_number);
     PER_HEAP
     void  leave_allocation_segment (generation* gen);
 
@@ -3422,6 +3428,9 @@ protected:
     PER_HEAP
     gc_history_per_heap gc_data_per_heap;
 
+    PER_HEAP
+    size_t maxgen_pinned_compact_before_advance;
+
     // dynamic tuning.
     PER_HEAP
     BOOL dt_low_ephemeral_space_p (gc_tuning_point tp);
diff --git a/src/gc/gcrecord.h b/src/gc/gcrecord.h
index 1d2a2cdeef..2dbf8e8842 100644
--- a/src/gc/gcrecord.h
+++ b/src/gc/gcrecord.h
@@ -61,12 +61,13 @@ enum gc_condemn_reason_condition
     gen_gen2_too_small = 13,
     gen_induced_noforce_p = 14,
     gen_before_bgc = 15,
-    gcrc_max = 16
+    gen_almost_max_alloc = 16,
+    gcrc_max = 17
 };
 
 #ifdef DT_LOG
 static char* record_condemn_reasons_gen_header = "[cg]i|f|a|t|";
-static char* record_condemn_reasons_condition_header = "[cc]i|e|h|v|l|l|e|m|m|m|m|g|o|s|n|b|";
+static char* record_condemn_reasons_condition_header = "[cc]i|e|h|v|l|l|e|m|m|m|m|g|o|s|n|b|a|";
 static char char_gen_number[4] = {'0', '1', '2', '3'};
 #endif //DT_LOG
 
@@ -135,6 +136,16 @@ public:
         return value;
     }
 
+    DWORD get_reasons0()
+    {
+        return condemn_reasons_gen;
+    }
+
+    DWORD get_reasons1()
+    {
+        return condemn_reasons_condition;
+    }
+
 #ifdef DT_LOG
     char get_gen_char (DWORD value)
     {
@@ -149,11 +160,9 @@ public:
     void print (int heap_num);
 };
 
-// *******IMPORTANT*******
-// The data members in this class are specifically
-// arranged in decending order by their sizes to guarantee no
-// padding - this is important for recording the ETW event 
-// 'cause ETW stuff will not apply padding.
+// Right now these are all size_t's but if you add a type that requires
+// padding you should add a pragma pack here since I am firing this as
+// a struct in an ETW event.
 struct gc_generation_data
 {
     // data recorded at the beginning of a GC
@@ -166,16 +175,24 @@ struct gc_generation_data
     size_t free_list_space_after;
     size_t free_obj_space_after;
     size_t in;
-    size_t out;
-
-    // The following data is calculated in 
-    // desired_new_allocation.
+    size_t pinned_surv;
+    size_t npinned_surv;
     size_t new_allocation;
-    size_t surv;
 
     void print (int heap_num, int gen_num);
 };
 
+struct maxgen_size_increase
+{
+    size_t free_list_allocated;
+    size_t free_list_rejected;
+    size_t end_seg_allocated;
+    size_t condemned_allocated;
+    size_t pinned_allocated;
+    size_t pinned_allocated_advance;
+    DWORD running_free_list_efficiency;
+};
+
 // The following indicates various mechanisms and one value
 // related to each one. Each value has its corresponding string
 // representation so if you change the enum's, make sure you
@@ -249,31 +266,19 @@ static gc_mechanism_descr gc_mechanisms_descr[max_mechanism_per_heap] =
     {"expanded heap ", str_heap_expand_mechanisms},
     {"compacted because of ", str_compact_reasons}
 };
-
 #endif //DT_LOG
 
 int index_of_set_bit (size_t power2);
 
 #define mechanism_mask (1 << (sizeof (DWORD) * 8 - 1))
 // interesting per heap data we want to record for each GC.
-// *******IMPORTANT*******
-// The data members in this class are specifically
-// arranged in decending order by their sizes to guarantee no
-// padding - this is important for recording the ETW event 
-// 'cause ETW stuff will not apply padding.
 class gc_history_per_heap
 {
 public:
-    // The reason we use max_generation+3 is because when we are 
-    // condemning 1+, we calculate generation 0 data twice and we'll
-    // store data from the 2nd pass in gen_data[max_generation+2].
-    // For generations > condemned_gen, the values are all 0.
-    gc_generation_data gen_data[max_generation+3]; 
+    gc_generation_data gen_data[max_generation+2]; 
+    maxgen_size_increase maxgen_size_info;
     gen_to_condemn_tuning gen_to_condemn_reasons;
 
-    // if we got the memory pressure in generation_to_condemn, this 
-    // will record that value; otherwise it's 0.
-    DWORD mem_pressure;
     // The mechanisms data is compacted in the following way:
     // most significant bit indicates if we did the operation.
     // the rest of the bits indicate the reason
@@ -286,7 +291,7 @@ public:
 
     DWORD heap_index; 
 
-    ULONGLONG extra_gen0_committed;
+    size_t extra_gen0_committed;
 
     void set_mechanism (gc_mechanism_per_heap mechanism_per_heap, DWORD value)
     {
@@ -315,45 +320,9 @@ public:
         return -1;
     }
 
-    void print (int heap_num);
+    void print();
 };
 
-#if defined(FEATURE_EVENT_TRACE) && !defined(FEATURE_REDHAWK)
-
-#if !defined(ETW_INLINE)
-#define ETW_INLINE DECLSPEC_NOINLINE __inline
-#endif
-
-ETW_INLINE
-ULONG 
-Etw_GCDataPerHeapSpecial(
-	__in PCEVENT_DESCRIPTOR Descriptor, 
-	__in LPCGUID EventGuid, 
-	__in gc_history_per_heap gc_data_per_heap,
-	__in ULONG datasize,
-	__in UINT8 ClrInstanceId)
-{
-    REGHANDLE RegHandle = Microsoft_Windows_DotNETRuntimePrivateHandle;
-#define ARGUMENT_COUNT_GCDataPerHeapTemplate 2
-    ULONG Error = ERROR_SUCCESS;
-typedef struct _MCGEN_TRACE_BUFFER {
-    EVENT_TRACE_HEADER Header;
-    EVENT_DATA_DESCRIPTOR EventData[ARGUMENT_COUNT_GCDataPerHeapTemplate];
-} MCGEN_TRACE_BUFFER;
-
-    MCGEN_TRACE_BUFFER TraceBuf;
-    PEVENT_DATA_DESCRIPTOR EventData = TraceBuf.EventData;
-
-    EventDataDescCreate(&EventData[0], &gc_data_per_heap, datasize);
-
-    EventDataDescCreate(&EventData[1], &ClrInstanceId, sizeof(ClrInstanceId));
-
-    return EventWrite(RegHandle, Descriptor, ARGUMENT_COUNT_GCDataPerHeapTemplate, EventData);
-}
-
-#undef TraceEvent
-#endif // FEATURE_EVENT_TRACE && !FEATURE_REDHAWK
-
 // we store up to 32 boolean settings.
 enum gc_global_mechanism_p
 {
@@ -362,14 +331,10 @@ enum gc_global_mechanism_p
     global_promotion,
     global_demotion,
     global_card_bundles,
+    global_elevation,
     max_global_mechanism
 };
 
-// *******IMPORTANT*******
-// The data members in this class are specifically
-// arranged in decending order by their sizes to guarantee no
-// padding - this is important for recording the ETW event 
-// 'cause ETW stuff will not apply padding.
 struct gc_history_global
 {
     // We may apply other factors after we calculated gen0 budget in
@@ -380,6 +345,8 @@ struct gc_history_global
     int condemned_generation;
     int gen0_reduction_count;
     gc_reason reason;
+    int pause_mode;
+    DWORD mem_pressure;
     DWORD global_mechanims_p;
 
     void set_mechanism_p (gc_global_mechanism_p mechanism)
diff --git a/src/gc/objecthandle.cpp b/src/gc/objecthandle.cpp
index b93d90d539..8c309056b1 100644
--- a/src/gc/objecthandle.cpp
+++ b/src/gc/objecthandle.cpp
@@ -146,9 +146,9 @@ void CALLBACK UpdateDependentHandle(_UNCHECKED_OBJECTREF *pObjRef, LPARAM *pExtr
     Object **pPrimaryRef = (Object **)pObjRef;
     Object **pSecondaryRef = (Object **)pExtraInfo;
   
-    LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT_CLASS("Querying for new location of ", 
+    LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT("Querying for new location of ", 
             pPrimaryRef, "to ", *pPrimaryRef)));
-    LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT_CLASS(" and ", 
+    LOG((LF_GC|LF_ENC, LL_INFO10000, LOG_HANDLE_OBJECT(" and ", 
             pSecondaryRef, "to ", *pSecondaryRef)));
 
 #ifdef _DEBUG
@@ -371,7 +371,7 @@ void CALLBACK UpdatePointer(_UNCHECKED_OBJECTREF *pObjRef, LPARAM *pExtraInfo, L
 {
     LIMITED_METHOD_CONTRACT;
 
-    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Querying for new location of ", pObjRef, "to ", *pObjRef)));
+    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT("Querying for new location of ", pObjRef, "to ", *pObjRef)));
 
     Object **ppRef = (Object **)pObjRef;
 
@@ -536,7 +536,7 @@ void CALLBACK UpdatePointerPinned(_UNCHECKED_OBJECTREF *pObjRef, LPARAM *pExtraI
     promote_func* callback = (promote_func*) lp2;
     callback(ppRef, (ScanContext *)lp1, GC_CALL_PINNED);
     
-    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT_CLASS("Updating ", pObjRef, "to pinned ", *pObjRef)));
+    LOG((LF_GC, LL_INFO100000, LOG_HANDLE_OBJECT("Updating ", pObjRef, "to pinned ", *pObjRef)));
 }
 
 
diff --git a/src/inc/corprof.idl b/src/inc/corprof.idl
index bb96b03ef0..894a8850be 100644
--- a/src/inc/corprof.idl
+++ b/src/inc/corprof.idl
@@ -582,6 +582,7 @@ typedef enum
                                           COR_PRF_MONITOR_GC |
                                           COR_PRF_MONITOR_SUSPENDS |
                                           COR_PRF_MONITOR_CLASS_LOADS |
+                                          COR_PRF_MONITOR_EXCEPTIONS |
                                           COR_PRF_MONITOR_JIT_COMPILATION,
 
     // MONITOR_IMMUTABLE represents all flags that may only be set during initialization.
diff --git a/src/inc/eventtrace.h b/src/inc/eventtrace.h
index f773fdd1cc..a2daacec60 100644
--- a/src/inc/eventtrace.h
+++ b/src/inc/eventtrace.h
@@ -231,18 +231,32 @@ namespace ETW
             // These values are gotten from the gc_reason
             // in gcimpl.h
             typedef  enum _GC_REASON { 
-                GC_ALLOC_SOH = 0 , 
-                GC_INDUCED = 1 , 
+                GC_ALLOC_SOH = 0, 
+                GC_INDUCED = 1, 
                 GC_LOWMEMORY = 2,
                 GC_EMPTY = 3,
                 GC_ALLOC_LOH = 4,
                 GC_OOS_SOH = 5,
                 GC_OOS_LOH = 6,
-                GC_INDUCED_NOFORCE = 7
+                GC_INDUCED_NOFORCE = 7,
+                GC_GCSTRESS = 8,
+                GC_LOWMEMORY_BLOCKING = 9,
+                GC_INDUCED_COMPACTING = 10,
+                GC_LOWMEMORY_HOST = 11
             } GC_REASON;
             typedef  enum _GC_TYPE { 
-                GC_NGC = 0 , GC_BGC = 1 , GC_FGC = 2
+                GC_NGC = 0, 
+                GC_BGC = 1, 
+                GC_FGC = 2
             } GC_TYPE;
+            typedef  enum _GC_ROOT_KIND { 
+              GC_ROOT_STACK = 0,
+              GC_ROOT_FQ = 1,
+              GC_ROOT_HANDLES = 2,
+              GC_ROOT_OLDER = 3,
+              GC_ROOT_SIZEDREF = 4,
+              GC_ROOT_OVERFLOW = 5
+            } GC_ROOT_KIND;
             struct {
                 ULONG Count;
                 ULONG Depth;
diff --git a/src/inc/eventtracebase.h b/src/inc/eventtracebase.h
index d21353a0f2..517125f49b 100644
--- a/src/inc/eventtracebase.h
+++ b/src/inc/eventtracebase.h
@@ -450,6 +450,14 @@ namespace ETW
         friend class ETW::EnumerationLog;
 #ifdef FEATURE_EVENT_TRACE
         static VOID SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions);
+        static VOID SendEventsForJitMethodsHelper(BaseDomain *pDomainFilter,
+            LoaderAllocator *pLoaderAllocatorFilter,
+            DWORD dwEventOptions,
+            BOOL fLoadOrDCStart,
+            BOOL fUnloadOrDCEnd,
+            BOOL fSendMethodEvent,
+            BOOL fSendILToNativeMapEvent,
+            BOOL fGetReJitIDs);
         static VOID SendEventsForNgenMethods(Module *pModule, DWORD dwEventOptions);
         static VOID SendMethodJitStartEvent(MethodDesc *pMethodDesc, SString *namespaceOrClassName=NULL, SString *methodName=NULL, SString *methodSignature=NULL);
         static VOID SendMethodILToNativeMapEvent(MethodDesc * pMethodDesc, DWORD dwEventOptions, ReJITID rejitID);
@@ -874,15 +882,9 @@ McGenEventProviderEnabled(
 #define ETW_PROVIDER_ENABLED(ProviderSymbol)                 \
         ProviderSymbol##_Context.IsEnabled
 
-#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId)\
-        MCGEN_ENABLE_CHECK(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, GCPerHeapHistory) ?\
-        Etw_GCDataPerHeapSpecial(&GCPerHeapHistory, &GarbageCollectionPrivateId, DataPerHeap, DataSize, ClrInstanceId)\
-        : ERROR_SUCCESS\
-
 #else
 
 #define ETW_PROVIDER_ENABLED(ProviderSymbol) TRUE
-#define FireEtwGCPerHeapHistorySpecial(DataPerHeap, DataSize, ClrInstanceId) 0
 
 #endif // FEATURE_EVENT_TRACE
 
diff --git a/src/inc/fxretarget.h b/src/inc/fxretarget.h
index 755cba67da..ee2f751f10 100644
--- a/src/inc/fxretarget.h
+++ b/src/inc/fxretarget.h
@@ -295,6 +295,7 @@ const FrameworkConfig g_arFxPolicy[] =
     {L"System.Net.NetworkInformation", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported},
     {L"System.Net.Primitives", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported},
     {L"System.Net.Requests", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported},
+    {L"System.Net.WebHeaderCollection", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported},
     {L"System.ObjectModel", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported},
     {L"System.Reflection", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported},
     {L"System.Reflection.Emit", MICROSOFT_PUBLICKEY_STR_L, VER_ASSEMBLYVERSION_STR_L, FxPolicyHelper::AppXBinder_Supported},
diff --git a/src/inc/stdmacros.h b/src/inc/stdmacros.h
index 578b19b16d..b410bdda25 100644
--- a/src/inc/stdmacros.h
+++ b/src/inc/stdmacros.h
@@ -294,7 +294,6 @@ inline ULONG RoundUpToPower2(ULONG x)
 #define DBG_IPTR_NAME(iptr)            \
         (iptr) ? "interior" : "base"
 
-
 #define LOG_HANDLE_OBJECT_CLASS(str1, hnd, str2, obj)    \
         str1 FMT_HANDLE str2 FMT_OBJECT FMT_CLASS "\n",  \
         DBG_ADDR(hnd), DBG_ADDR(obj), DBG_CLASS_NAME_OBJ(obj)
@@ -308,6 +307,15 @@ inline ULONG RoundUpToPower2(ULONG x)
         DBG_PIN_NAME(pin), DBG_IPTR_NAME(iptr),          \
         DBG_ADDR(obj), DBG_CLASS_NAME_IPTR(obj,iptr)
 
+#define LOG_HANDLE_OBJECT(str1, hnd, str2, obj)          \
+        str1 FMT_HANDLE str2 FMT_OBJECT "\n",            \
+        DBG_ADDR(hnd), DBG_ADDR(obj)
+
+#define LOG_PIPTR_OBJECT(obj, pin, iptr)                 \
+        FMT_PIPTR FMT_ADDR "\n",                         \
+        DBG_PIN_NAME(pin), DBG_IPTR_NAME(iptr),          \
+        DBG_ADDR(obj)
+
 #define UNIQUE_LABEL_DEF(a,x)           a##x
 #define UNIQUE_LABEL_DEF_X(a,x)         UNIQUE_LABEL_DEF(a,x)
 #ifdef _MSC_VER
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
index 8181957f35..409fc64542 100644
--- a/src/jit/assertionprop.cpp
+++ b/src/jit/assertionprop.cpp
@@ -459,8 +459,8 @@ void                Compiler::optAddCopies()
             tree->gtOp.gtOp1  = newAsgn;
             tree->gtOp.gtOp2  = copyAsgn;
 
-            tree->gtFlags    |= ( newAsgn->gtFlags & GTF_GLOB_EFFECT);
-            tree->gtFlags    |= (copyAsgn->gtFlags & GTF_GLOB_EFFECT);
+            tree->gtFlags    |= ( newAsgn->gtFlags & GTF_ALL_EFFECT);
+            tree->gtFlags    |= (copyAsgn->gtFlags & GTF_ALL_EFFECT);
         }
 
 #ifdef DEBUG
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 0762e61da4..70bd108d5b 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -5419,9 +5419,11 @@ void CodeGen::genAllocLclFrame(unsigned  frameSize,
 
 #else // !CPU_LOAD_STORE_ARCH
 
-        // Code size for each instruction. We need this because the 
+        // Code size for each instruction. We need this because the
         // backward branch is hard-coded with the number of bytes to branch.
-
+        // The encoding differs based on the architecture and what register is
+        // used (namely, using RAX has a smaller encoding).
+        //
         // loop:
         // For x86
         //      test [esp + eax], eax       3
@@ -5440,23 +5442,27 @@ void CodeGen::genAllocLclFrame(unsigned  frameSize,
         //      sub rbp, 0x1000             7
         //      cmp rbp, -frameSize         7
         //      jge loop                    2
+
         getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
         inst_RV_IV(INS_sub,  initReg, CORINFO_PAGE_SIZE, EA_PTRSIZE);
         inst_RV_IV(INS_cmp,  initReg, -((ssize_t)frameSize), EA_PTRSIZE);
-        int extraBytesForBackJump = 0;
+
+        int bytesForBackwardJump;
 #ifdef _TARGET_AMD64_
-        extraBytesForBackJump = ((initReg == REG_EAX) ? 3 : 5);
-#endif // _TARGET_AMD64_
-        inst_IV(INS_jge, -15 - extraBytesForBackJump);   // Branch backwards to Start of Loop
+        assert((initReg == REG_EAX) || (initReg == REG_EBP));   // We use RBP as initReg for EH funclets.
+        bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
+#else // !_TARGET_AMD64_
+        assert(initReg == REG_EAX);
+        bytesForBackwardJump = -15;
+#endif // !_TARGET_AMD64_
+
+        inst_IV(INS_jge, bytesForBackwardJump);   // Branch backwards to start of loop
 
 #endif // !CPU_LOAD_STORE_ARCH
 
         *pInitRegZeroed = false;  // The initReg does not contain zero
 
 #ifdef _TARGET_XARCH_
-        // The backward branch above depends upon using EAX (and for Amd64 funclets EBP)
-        assert((initReg == REG_EAX) AMD64_ONLY(|| (initReg == REG_EBP)));
-
         if (pushedStubParam)
         {
             // pop eax
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index b6c139e678..63efcf0ffd 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -3550,14 +3550,28 @@ void CodeGen::genCodeForCpObj(GenTreeCpObj* cpObjNode)
     // src = RSI and dst = RDI. 
     // Either these registers must not contain lclVars, or they must be dying or marked for spill.
     // This is because these registers are incremented as we go through the struct.
-    if (srcAddr->gtRegNum == REG_RSI)
-    {
-        assert(!genIsRegCandidateLocal(srcAddr) || (srcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) != 0);
-    }
-    if (dstAddr->gtRegNum == REG_RDI)
-    {
-        assert(!genIsRegCandidateLocal(dstAddr) || (dstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) != 0);
-    }
+    GenTree* actualSrcAddr = srcAddr->gtSkipReloadOrCopy();
+    GenTree* actualDstAddr = dstAddr->gtSkipReloadOrCopy();
+    unsigned srcLclVarNum = BAD_VAR_NUM;
+    unsigned dstLclVarNum = BAD_VAR_NUM;
+    bool isSrcAddrLiveOut = false;
+    bool isDstAddrLiveOut = false;
+    if (genIsRegCandidateLocal(actualSrcAddr))
+    {
+        srcLclVarNum = actualSrcAddr->AsLclVarCommon()->gtLclNum;
+        isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+    }
+    if (genIsRegCandidateLocal(actualDstAddr))
+    {
+        dstLclVarNum = actualDstAddr->AsLclVarCommon()->gtLclNum;
+        isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+    }
+    assert((actualSrcAddr->gtRegNum != REG_RSI) ||
+           !isSrcAddrLiveOut                ||
+           ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
+    assert((actualDstAddr->gtRegNum != REG_RDI) ||
+           !isDstAddrLiveOut                ||
+           ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
 #endif // DEBUG
 
     // Consume these registers.
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 8dc070618e..9a624d830e 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -1968,14 +1968,6 @@ public:
     // is such an object pointer.
     bool                    gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd);
 
-
-    // Assignment trees which contain an unmanged PInvoke call need to have a simple op1
-    // in order to prevent us from have a TYP_BYREF live accross a call to a PInvoke
-    // If necessary this method will morph such an assignment to honor this restriction
-    //
-    GenTreePtr              gtCheckReorderAssignmentForUnmanagedCall(GenTreePtr tree);
-
-
     //-------------------------------------------------------------------------
 
     GenTreePtr              gtFoldExpr       (GenTreePtr    tree);
@@ -3873,6 +3865,14 @@ public:
         return m_switchDescMap;
     }
 
+    // Invalidate the map of unique switch block successors. For example, since the hash key of the map
+    // depends on block numbers, we must invalidate the map when the blocks are renumbered, to ensure that
+    // we don't accidentally look up and return the wrong switch data.
+    void InvalidateUniqueSwitchSuccMap()
+    {
+        m_switchDescMap = nullptr;
+    }
+
     // Requires "switchBlock" to be a block that ends in a switch.  Returns
     // the corresponding SwitchUniqueSuccSet.
     SwitchUniqueSuccSet GetDescriptorForSwitch(BasicBlock* switchBlk);
@@ -5048,7 +5048,7 @@ protected :
     void                optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);
 
     // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in "redirectMap",
-    // change "blk" so that "blk3" is this successor.
+    // change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
     void                optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap);
 
     // Marks the containsCall information to "lnum" and any parent loops.
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 54472600f8..12987cff62 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -2316,16 +2316,28 @@ void Compiler::fgDfsInvPostOrder()
     // mark in this step.
     BlockSet_ValRet_T startNodes = fgDomFindStartNodes();
 
-    // Make sure fgFirstBB is still there, even if it participates in a loop.
-    // Review: it might be better to do this:
+    // Make sure fgEnterBlks are still there in startNodes, even if they participate in a loop (i.e., there is
+    // an incoming edge into the block).
+    assert(fgEnterBlksSetValid);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    //
     //    BlockSetOps::UnionD(this, startNodes, fgEnterBlks);
-    // instead, but this causes problems on ARM, because we for BBJ_CALLFINALLY/BBJ_ALWAYS pairs, we add the BBJ_ALWAYS
+    //
+    // This causes problems on ARM, because we for BBJ_CALLFINALLY/BBJ_ALWAYS pairs, we add the BBJ_ALWAYS
     // to the enter blocks set to prevent flow graph optimizations from removing it and creating retless call finallies
     // (BBF_RETLESS_CALL). This leads to an incorrect DFS ordering in some cases, because we start the recursive walk
     // from the BBJ_ALWAYS, which is reachable from other blocks. A better solution would be to change ARM to avoid
     // creating retless calls in a different way, not by adding BBJ_ALWAYS to fgEnterBlks.
+    //
+    // So, let us make sure at least fgFirstBB is still there, even if it participates in a loop.
     BlockSetOps::AddElemD(this, startNodes, 1);
     assert(fgFirstBB->bbNum == 1);
+#else
+    BlockSetOps::UnionD(this, startNodes, fgEnterBlks);
+#endif
+
+    assert(BlockSetOps::IsMember(this, startNodes, fgFirstBB->bbNum));
 
     // Call the recursive helper.
     unsigned postIndex = 1;
@@ -10510,7 +10522,7 @@ void                Compiler::fgRemoveBlock(BasicBlock*   block,
     // If we've cached any mappings from switch blocks to SwitchDesc's (which contain only the
     // *unique* successors of the switch block), invalidate that cache, since an entry in one of
     // the SwitchDescs might be removed.
-    m_switchDescMap = NULL;
+    InvalidateUniqueSwitchSuccMap();
 
     noway_assert((block == fgFirstBB) || (bPrev && (bPrev->bbNext == block)));
     noway_assert(!(block->bbFlags & BBF_DONT_REMOVE));
@@ -11113,6 +11125,9 @@ bool            Compiler::fgRenumberBlocks()
     if (renumbered || newMaxBBNum)
     {
         NewBasicBlockEpoch();
+
+        // The key in the unique switch successor map is dependent on the block number, so invalidate that cache.
+        InvalidateUniqueSwitchSuccMap();
     }
     else
     {
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index d37b3bf181..76c9f504a6 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -5881,6 +5881,17 @@ GenTreePtr          Compiler::gtCloneExpr(GenTree * tree,
             }
             break;
 
+        case GT_LEA:
+            {
+                GenTreeAddrMode* addrModeOp = tree->AsAddrMode();
+                copy = new(this, GT_LEA) GenTreeAddrMode(addrModeOp->TypeGet(),
+                                                         addrModeOp->Base(),
+                                                         addrModeOp->Index(),
+                                                         addrModeOp->gtScale,
+                                                         addrModeOp->gtOffset);
+            }
+            break;
+
 #ifdef FEATURE_SIMD
         case GT_SIMD:
             {
@@ -9764,32 +9775,41 @@ CHK_OVF:
          * or overflow - when dividing MIN by -1 */
 
         case GT_DIV:
-            if (!i2) return tree;
-            if (UINT32(i1) == 0x80000000 && i2 == -1)
+        case GT_MOD:
+        case GT_UDIV:
+        case GT_UMOD:
+            if (INT32(i2) == 0)
             {
-                /* In IL we have to throw an exception */
+                // Division by zero: 
+                // We have to evaluate this expression and throw an exception
                 return tree;
             }
-            i1 = INT32(i1) / INT32(i2); break;
-
-        case GT_MOD:
-            if (!i2) return tree;
-            if (UINT32(i1) == 0x80000000 && i2 == -1)
+            else if ((INT32(i2) == -1) &&
+                     (UINT32(i1) == 0x80000000))
             {
-                /* In IL we have to throw an exception */
+                // Overflow Division: 
+                // We have to evaluate this expression and throw an exception
                 return tree;
             }
-            i1 = INT32(i1) % INT32(i2); break;
 
-        case GT_UDIV:
-            if (!i2) return tree;
-            if (UINT32(i1) == 0x80000000 && i2 == -1) return tree;
-            i1 = UINT32(i1) / UINT32(i2); break;
-
-        case GT_UMOD:
-            if (!i2) return tree;
-            if (UINT32(i1) == 0x80000000 && i2 == -1) return tree;
-            i1 = UINT32(i1) % UINT32(i2); break;
+            if (tree->gtOper == GT_DIV)
+            {
+                i1 = INT32(i1) / INT32(i2);
+            }
+            else if (tree->gtOper == GT_MOD)
+            {
+                i1 = INT32(i1) % INT32(i2);
+            }
+            else if (tree->gtOper == GT_UDIV)
+            {
+                i1 = UINT32(i1) / UINT32(i2);
+            }
+            else 
+            {
+                assert(tree->gtOper == GT_UMOD);
+                i1 = UINT32(i1) % UINT32(i2);
+            }
+            break;
 
         default:
             return tree;
@@ -11034,36 +11054,6 @@ bool            Compiler::gtHasCatchArg(GenTreePtr tree)
     return false;
 }
 
-/*****************************************************************************
- *
- *  Callback that checks for a tree that is a GT_CALL to an umanaged target (PInvoke)
- */
-
-static Compiler::fgWalkResult  gtFindUnmanagedCall(GenTreePtr *               pTree,
-                                                   Compiler::fgWalkData *  /* data */)
-{
-    // If the current node is not a GT_CALL then continue searching...
-    if ((*pTree)->OperGet() != GT_CALL) 
-        return Compiler::WALK_CONTINUE;
-
-    // If the current call node does not have the GTF_CALL_UNMANAGED flag set then continue searching...
-    if (((*pTree)->gtFlags & GTF_CALL_UNMANAGED) == 0)
-        return Compiler::WALK_CONTINUE;
-
-    // We found an unmanaged call site
-    return Compiler::WALK_ABORT;
-}
-
-/*****************************************************************************/
-bool            Compiler::gtHasUnmanagedCall(GenTreePtr tree)
-{
-    // Does the current subtree contain an unmanaged call?
-    if (fgWalkTreePre(&tree, gtFindUnmanagedCall) == WALK_ABORT)
-    {
-        return true;
-    }
-    return false;
-}
 
 //------------------------------------------------------------------------
 // gtHasCallOnStack:
@@ -11154,189 +11144,6 @@ void Compiler::gtCheckQuirkAddrExposedLclVar(GenTreePtr tree, GenTreeStack* pare
 #endif
 }
 
-//------------------------------------------------------------------------
-// gtCheckReorderAssignmentForUnmanagedCall: 
-//
-//   Assignment trees which contain an unmanged PInvoke call need to have a simple op1
-//   in order to prevent us from have a TYP_BYREF live accross a call to a PInvoke
-//   This is because we are not allowed to keep a GC pointer in a register accross a
-//   PInvoke call sit.  We cannot update them when/if a GC occurs during the PInvoke call.
-//
-// Arguments:
-//    tree      - An assignment or assignOp GenTree node, that has not yet been morphed
-//
-// Output:
-//    tree      - An unchanged tree 
-//                or a mutated tree when we need to evaluate the address for op1 into a temp
-//
-//   We will mutate the assignment tree  when op1 has a side-effect that might require us 
-//   to evaluate it before op2 and if the op2 tree contains an unmanaged call site
-//
-GenTreePtr      Compiler::gtCheckReorderAssignmentForUnmanagedCall(GenTreePtr tree)
-{
-    assert(tree->OperKind() & GTK_ASGOP);
-
-#if INLINE_NDIRECT
-    // Does this method have any unmanaged calls?
-    if (info.compCallUnmanaged != 0)
-    {
-        GenTreePtr op1     = tree->gtOp.gtOp1;
-        GenTreePtr op2     = tree->gtGetOp2();
-        var_types  asgTyp  = op1->TypeGet();
-
-        // Does op1 have a side-effect that causes us to evaluate it before op2?
-        // Or, does it contain a BYREF that must not be kept live across an unmanaged call?
-        if  (op1->gtFlags & GTF_ALL_EFFECT)
-        {
-            // Does op2 contain an unmanged call?
-            if (gtHasUnmanagedCall(op2))
-            {
-/*
-                 +---------+----------+ 
-           tree  |      GT_ASG        | 
-                 +---------+----------+
-                           | 
-                         /   \ 
-                       /       \ 
-                     /           \ 
-         +-----+-----+             +-----+-----+ 
-     op1 |  . . .    |         op2 |   . . .   |
-         +-----+-----+             +-----+-----+  
-         GTF_ALL_EFFECT            HasUnmanagedCall
-
-*/
-                // op1 could be a sequence of GT_COMMA nodes 
-                // if it is then we traverse down the op2 side 
-                // until we reach a non comma node
-                // and we set splice to the last GT_COMMA node that we visited
-                //
-                GenTreePtr splice = nullptr;
-                GenTreePtr op1Val = op1;
-                while (op1Val->gtOper == GT_COMMA)
-                {
-                    splice = op1;
-                    op1Val = op1->gtOp.gtOp2;
-                }
-
-                // Now op1Val is now the actual target of the assignment 
-                // it could be a GT_IND in which case we just remove the GT_IND
-                // otherwise we take its address by adding a GT_ADDR above it.
-
-                GenTreePtr op1Addr;
-                var_types  addrTyp;
-
-                if (op1Val->gtOper == GT_IND)
-                {
-                    op1Addr = op1Val->gtOp.gtOp1;
-                    addrTyp = op1Addr->TypeGet();
-
-                    // You cannot have a GT_IND on a TYP_REF
-                    assert(addrTyp != TYP_REF);
-                }
-                else
-                {
-                    addrTyp = TYP_BYREF;
-                    op1Addr = gtNewOperNode(GT_ADDR, addrTyp, op1Val); 
-                }
-
-                // addrTyp is the now type of address that we have.
-                //
-                // If we added a GT_ADDR node then we have to assume that we have a TYP_BYREF
-                // if we had a GT_IND then the child node tells us if we have a TYP_BYREF
-                // or an TYP_I_IMPL pointer.
-                //
-
-                // If addrTyp is not a GC type (i.e. TYP_BYREF) we can return
-                //
-                if (addrTyp != TYP_BYREF)
-                    return tree;  // early exit, tree is unmodified
-
-                 // DebugCheckFlags can complain later if we have a GTF_GLOB_REF flag here
-                //
-                if (op1Val->gtOper == GT_FIELD)
-                {
-                    // &clsVar doesn't need GTF_GLOB_REF
-                    op1Addr->gtFlags &= ~GTF_GLOB_REF;
-                }
-
-               // We will transform the tree so that the assignment will not have to 
-                // evaluate op1 and keep it live across the unmanged call in op2
-                // 
-                unsigned   newTempLclNum  = lvaGrabTemp(true DEBUGARG("Force eval op1"));
-                GenTreePtr asgAddr = gtNewTempAssign(newTempLclNum, op1Addr);
-
-                if (splice != nullptr)
-                {
-                    GenTreePtr commaVal = op1;
-                    assert(commaVal->gtOper == GT_COMMA);
-                    while (commaVal->gtOper == GT_COMMA)
-                    {
-                        commaVal->gtType = TYP_VOID;
-                        commaVal = commaVal->gtOp.gtOp2;
-                    }
-                    splice->gtOp.gtOp2 = asgAddr;
-                }
-                else
-                {
-                    op1 = asgAddr;
-                }
-
-                GenTreePtr asgDest = gtNewOperNode(GT_IND, asgTyp, gtNewLclvNode(newTempLclNum, addrTyp));
-
-                op2 = gtNewOperNode(tree->gtOper, tree->gtType, asgDest, op2);
-                op2->gtFlags |= GTF_ASG;
-
-                tree->ChangeOper(GT_COMMA);
-                tree->gtType = TYP_VOID;
-                tree->gtOp.gtOp1 = op1;
-                tree->gtOp.gtOp2 = op2;
-                tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
-                tree->gtFlags |= op1->gtFlags & GTF_ALL_EFFECT;
-                tree->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
-
-/*
-            +------+-------+ 
-    asgAddr |   GT_ASG     |
-            +------+-------+
-                   |
-                 /   \
-               /       \
-     +-----+-----+    +-----+-----+
-     |  LclVar   |    |  GT_ADDR  |  op1Addr
-     |   lclNum  |    | or GT_ADD |
-     +-----+-----+    +-----+-----+
-
-
-                       +------+-------+ 
-                 tree  |   GT_COMMA*  |  (Mutated from GT_ASG)
-                       +------+-------+
-                              |  
-                            /  \ 
-                          /      \ 
-                        /         \
-             +-----+-----+       +-----+-----+ 
-    old op1  | GT_COMMA  |       |  GT_ASG   | gtNewOperNode
- or asgAddr  |  GT_ASG   |       +-----+-----+
-             +-----+-----+             |        
-                                     /   \
-                                   /       \
-                         +-----+-----+    +-----+-----+
-                 asgDest |  GT_IND   |    |  old op2  | 
-                         +-----+-----+    +-----+-----+
-                               |          HasUnmanagedCall
-                         +-----+-----+
-                         |  LclVar   | 
-                         |   lclNum  |
-                         +-----+-----+  
-*/
-            }
-        }
-    }
-#endif
-    return tree;
-}
-
-
 //Checks to see if we're allowed to optimize Type::op_Equality or Type::op_Inequality on this operand.
 //We're allowed to convert to GT_EQ/GT_NE if one of the operands is:
 //  1) The result of Object::GetType
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 08afe0da67..d1b2472333 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -608,8 +608,10 @@ public:
             return gtVNPair.SetConservative(vn);
         }
     }
-
-
+    void                ClearVN()
+    {
+        gtVNPair = ValueNumPair();          // Initializes both elements to "NoVN".
+    }
 
     //---------------------------------------------------------------------
     //  The first set of flags can be used with a large set of nodes, and
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index 691db8071a..aa3e4ab311 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -247,6 +247,7 @@ void initRefTypeNames()
     refTypeNames[RefTypeZeroInit] = "RefTypeZeroInit";
     refTypeNames[RefTypeUpperVectorSaveDef] = "RefTypeUpperVectorSaveDef";
     refTypeNames[RefTypeUpperVectorSaveUse] = "RefTypeUpperVectorSaveUse";
+    refTypeNames[RefTypeKillGCRefs] = "RefTypeKillGCRefs";
 
     shortRefTypeNames[RefTypeInvalid]  = "Invl";
     shortRefTypeNames[RefTypeDef]      = "Def ";
@@ -260,6 +261,7 @@ void initRefTypeNames()
     shortRefTypeNames[RefTypeZeroInit] = "Zero";
     shortRefTypeNames[RefTypeUpperVectorSaveDef] = "UVSv";
     shortRefTypeNames[RefTypeUpperVectorSaveUse] = "UVRs";
+    shortRefTypeNames[RefTypeKillGCRefs] = "KlGC";
 }
 #endif // DEBUG
 
@@ -664,7 +666,7 @@ LinearScan::associateRefPosWithInterval(RefPosition *rp)
     } 
     else
     {
-        assert(rp->refType == RefTypeBB);
+        assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs));
     }
 }
 
@@ -2628,6 +2630,11 @@ LinearScan::buildKillPositionsForNode(GenTree*     tree,
                 }
             }
         }
+
+        if (tree->IsCall() && (tree->gtFlags & GTF_CALL_UNMANAGED) != 0)
+        {
+            RefPosition * pos = newRefPosition((Interval *)nullptr, currentLoc, RefTypeKillGCRefs, tree, (allRegs(TYP_REF) & ~RBM_ARG_REGS));
+        }
         return true;
     }
 
@@ -5130,6 +5137,39 @@ void LinearScan::unassignPhysReg( RegRecord * regRec, RefPosition* spillRefPosit
     }
 }
 
+//------------------------------------------------------------------------
+// spillGCRefs: Spill any GC-type intervals that are currently in registers.a
+//
+// Arguments:
+//    killRefPosition - The RefPosition for the kill
+//
+// Return Value:
+//    None.
+//
+void
+LinearScan::spillGCRefs(RefPosition* killRefPosition)
+{
+    // For each physical register that can hold a GC type,
+    // if it is occupied by an interval of a GC type, spill that interval.
+    regMaskTP candidateRegs = killRefPosition->registerAssignment;
+    while (candidateRegs != RBM_NONE)
+    {
+        regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+        candidateRegs &= ~nextRegBit;
+        regNumber nextReg = genRegNumFromMask(nextRegBit);
+        RegRecord* regRecord = getRegisterRecord(nextReg);
+        Interval* assignedInterval = regRecord->assignedInterval;
+        if (assignedInterval == nullptr ||
+            (assignedInterval->isActive == false) ||
+            !varTypeIsGC(assignedInterval->registerType))
+        {
+            continue;
+        }
+        unassignPhysReg(regRecord, assignedInterval->recentRefPosition);
+    }
+    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr));
+}
+
 //------------------------------------------------------------------------
 // processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated
 //
@@ -5708,7 +5748,7 @@ LinearScan::allocateRegisters()
         } 
         else 
         {
-            assert(refType == RefTypeBB);
+            assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs));
         }
 
         // For the purposes of register resolution, we handle the DummyDefs before
@@ -5764,6 +5804,12 @@ LinearScan::allocateRegisters()
             continue;
         }
 
+        if (refType == RefTypeKillGCRefs)
+        {
+            spillGCRefs(currentRefPosition);
+            continue;
+        }
+
         // If this is a FixedReg, disassociate any inactive constant interval from this register.
         // Otherwise, do nothing.
         if (refType == RefTypeFixedReg)
@@ -7026,6 +7072,10 @@ LinearScan::resolveRegisters()
                 // mismatch.
                 assert(getNextBlock() == nullptr ||
                        !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn, currentRefPosition->getInterval()->getVarIndex(compiler)));
+                currentRefPosition->referent->recentRefPosition = currentRefPosition;
+                continue;
+            case RefTypeKillGCRefs:
+                // No action to take at resolution time, and no interval to update recentRefPosition for.
                 continue;
             case RefTypeDummyDef:
             case RefTypeParamDef:
@@ -9279,6 +9329,11 @@ LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, Interval* interval, reg
         }
         break;
 
+    // Done with GC Kills
+    case LSRA_EVENT_DONE_KILL_GC_REFS:
+        printf("DoneKillGC ");
+        break;
+
     // Block boundaries
     case LSRA_EVENT_START_BB:
         assert(currentBlock != nullptr);
@@ -9763,12 +9818,19 @@ LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBl
         }
         printf("  %s%c%c ", shortRefTypeNames[refPosition->refType], lastUseChar, delayChar);
     }
-    else
+    else if (refPosition->isPhysRegRef)
     {
         RegRecord* regRecord = refPosition->getReg();
         printf(regNameFormat, getRegName(regRecord->regNum));
         printf(" %s   ", shortRefTypeNames[refPosition->refType]);
     }
+    else
+    {
+        assert(refPosition->refType == RefTypeKillGCRefs);
+        // There's no interval or reg name associated with this.
+        printf(regNameFormat, "   ");
+        printf(" %s   ", shortRefTypeNames[refPosition->refType]);
+    }
 }
 
 //------------------------------------------------------------------------
@@ -9832,7 +9894,7 @@ LinearScan::verifyFinalAllocation()
                 regRecord->recentRefPosition = currentRefPosition;
                 regNum = regRecord->regNum;
             }
-            else
+            else if (currentRefPosition->isIntervalRef())
             {
                 interval = currentRefPosition->getInterval();
                 interval->recentRefPosition = currentRefPosition;
@@ -10065,6 +10127,25 @@ LinearScan::verifyFinalAllocation()
                 }
             }
             break;
+        case RefTypeKillGCRefs:
+            // No action to take.
+            // However, we will assert that, at resolution time, no registers contain GC refs.
+            {
+                DBEXEC(VERBOSE, printf("           "));
+                regMaskTP candidateRegs = currentRefPosition->registerAssignment;
+                while (candidateRegs != RBM_NONE)
+                {
+                    regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+                    candidateRegs &= ~nextRegBit;
+                    regNumber nextReg = genRegNumFromMask(nextRegBit);
+                    RegRecord* regRecord = getRegisterRecord(nextReg);
+                    Interval* assignedInterval = regRecord->assignedInterval;
+                    assert (assignedInterval == nullptr ||
+                            !varTypeIsGC(assignedInterval->registerType));
+                }
+            }
+            break;
+
         case RefTypeExpUse:
         case RefTypeDummyDef:
             // Do nothing; these will be handled by the RefTypeBB.
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
index 6b1d3909aa..e57873fb65 100644
--- a/src/jit/lsra.h
+++ b/src/jit/lsra.h
@@ -81,6 +81,7 @@ enum RefType : unsigned char
     RefTypeZeroInit            = (0x30 | RefTypeDef),
     RefTypeUpperVectorSaveDef  = (0x40 | RefTypeDef),
     RefTypeUpperVectorSaveUse  = (0x40 | RefTypeUse),
+    RefTypeKillGCRefs          = 0x80,
     RefTypeBound,
 };
 
@@ -722,6 +723,8 @@ private:
 
     void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
 
+    void spillGCRefs(RefPosition* killRefPosition);
+
     /*****************************************************************************
      * For Resolution phase
      ****************************************************************************/
@@ -846,6 +849,7 @@ private:
                          LSRA_EVENT_SPILL_EXTENDED_LIFETIME,
                          LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
                          LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL,
+                         LSRA_EVENT_DONE_KILL_GC_REFS,
 
                          // Block boundaries
                          LSRA_EVENT_START_BB,
@@ -1311,7 +1315,7 @@ public:
     unsigned        rpNum;              // The unique RefPosition number, equal to its index in the refPositions list. Only used for debugging dumps.
 #endif // DEBUG
 
-    bool            isIntervalRef() { return !isPhysRegRef; }
+    bool            isIntervalRef() { return (!isPhysRegRef && (referent != nullptr)); }
 
     // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
     // interval
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index c21bb2991d..7f5ca56dc6 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -3748,8 +3748,8 @@ void                Compiler::fgFixupStructReturn(GenTreePtr     call)
 /*****************************************************************************
  *
  *  A little helper used to rearrange nested commutative operations. The
- *  effect is that nested commutative operations are transformed into a
- *  'left-deep' tree, i.e. into something like this:
+ *  effect is that nested associative, commutative operations are transformed
+ *  into a 'left-deep' tree, i.e. into something like this:
  *
  *      (((a op b) op c) op d) op...
  */
@@ -3758,51 +3758,55 @@ void                Compiler::fgFixupStructReturn(GenTreePtr     call)
 
 void                Compiler::fgMoveOpsLeft(GenTreePtr tree)
 {
-    GenTreePtr      op1  = tree->gtOp.gtOp1;
-    GenTreePtr      op2  = tree->gtOp.gtOp2;
-    genTreeOps      oper = tree->OperGet();
+    GenTreePtr op1;
+    GenTreePtr op2;
+    genTreeOps oper;
 
-    noway_assert(GenTree::OperIsCommutative(oper));
-    noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR ||
-                 oper == GT_AND || oper == GT_MUL);
-    noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
-    noway_assert(oper == op2->gtOper);
+    do
+    {
+        op1  = tree->gtOp.gtOp1;
+        op2  = tree->gtOp.gtOp2;
+        oper = tree->OperGet();
 
-    // Commutativity doesn't hold if overflow checks are needed
+        noway_assert(GenTree::OperIsCommutative(oper));
+        noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR ||
+                     oper == GT_AND || oper == GT_MUL);
+        noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
+        noway_assert(oper == op2->gtOper);
 
-    if (tree->gtOverflowEx() || op2->gtOverflowEx())
-        return;
+        // Commutativity doesn't hold if overflow checks are needed
 
-    if (gtIsActiveCSE_Candidate(op2))
-    {
-        // If we have marked op2 as a CSE candidate,
-        // we can't perform a commutative reordering
-        // because any value numbers that we computed for op2
-        // will be incorrect after performing a commutative reordering
-        //
-        return;
-    }
+        if (tree->gtOverflowEx() || op2->gtOverflowEx())
+            return;
 
-    if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
-        return;
+        if (gtIsActiveCSE_Candidate(op2))
+        {
+            // If we have marked op2 as a CSE candidate,
+            // we can't perform a commutative reordering
+            // because any value numbers that we computed for op2
+            // will be incorrect after performing a commutative reordering
+            //
+            return;
+        }
 
-    // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
-    if (    ((oper == GT_ADD) || (oper == GT_MUL))
-         && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)               )
-    {
-        return;
-    }
+        if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
+            return;
 
-    if ( (tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN )
-    {
-        // We could deal with this, but we were always broken and just hit the assert
-        // below regarding flags, which means it's not frequent, so will just bail out.
-        // See #195514
-        return;
-    }
+        // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
+        if (    ((oper == GT_ADD) || (oper == GT_MUL))
+             && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)               )
+        {
+            return;
+        }
+
+        if ( (tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN )
+        {
+            // We could deal with this, but we were always broken and just hit the assert
+            // below regarding flags, which means it's not frequent, so will just bail out.
+            // See #195514
+            return;
+        }
 
-    do
-    {
         noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
 
         GenTreePtr      ad1 = op2->gtOp.gtOp1;
@@ -4191,6 +4195,10 @@ GenTreePtr          Compiler::fgMorphArrayIndex(GenTreePtr tree)
     // Store information about it.
     GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int) elemOffs, elemStructType));
 
+    // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
+
+    GenTreePtr indTree = tree;
+
     // Did we create a bndsChk tree?
     if  (bndsChk)
     {
@@ -4215,13 +4223,30 @@ GenTreePtr          Compiler::fgMorphArrayIndex(GenTreePtr tree)
         tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
     }
 
+    // Currently we morph the tree to perform some folding operations prior 
+    // to attaching fieldSeq info and labeling constant array index contributions
+    // 
     fgMorphTree(tree);
 
-    if (fgIsCommaThrow(tree))
-        return tree;
-
+    // Ideally we just want to proceed to attaching fieldSeq info and labeling the 
+    // constant array index contributions, but the morphing operation may have changed 
+    // the 'tree' into something that now unconditionally throws an exception.
+    //
+    // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
+    // or it could be left unchanged.  If it is unchanged then we should not return, 
+    // instead we should proceed to attaching fieldSeq info, etc...
+    // 
     GenTreePtr arrElem = tree->gtEffectiveVal();
 
+    if (fgIsCommaThrow(tree))
+    {
+        if ((arrElem != indTree) ||          // A new tree node may have been created
+            (indTree->OperGet() != GT_IND))  // The GT_IND may have been changed to a GT_CNS_INT
+        {
+            return tree;     // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
+        }
+    }
+
     assert(!fgGlobalMorph || (arrElem->gtFlags & GTF_MORPHED));
 
     addr = arrElem->gtOp.gtOp1;
@@ -7668,10 +7693,6 @@ GenTreePtr          Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* ma
 #if !FEATURE_STACK_FP_X87
         tree = fgMorphForRegisterFP(tree);
 #endif
-        if (tree->OperKind() & GTK_ASGOP)
-        {
-            tree = gtCheckReorderAssignmentForUnmanagedCall(tree);
-        }
     }
 
     genTreeOps      oper    = tree->OperGet();
@@ -10920,28 +10941,31 @@ ASG_OP:
 
     case GT_XOR:
 
-        /* "x ^ -1" is "~x" */
-
-        if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == -1))
-        {
-            tree->ChangeOper(GT_NOT);
-            tree->gtOp2 = NULL;
-            DEBUG_DESTROY_NODE(op2);
-        }
-        else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == -1))
-        {
-            tree->ChangeOper(GT_NOT);
-            tree->gtOp2 = NULL;
-            DEBUG_DESTROY_NODE(op2);
-        }
-        else if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1) &&
-                 op1->OperIsCompare())
+        if (!optValnumCSE_phase)
         {
-            /* "binaryVal ^ 1" is "!binaryVal" */
-            gtReverseCond(op1);
-            DEBUG_DESTROY_NODE(op2);
-            DEBUG_DESTROY_NODE(tree);
-            return op1;
+            /* "x ^ -1" is "~x" */
+            
+            if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == -1))
+            {
+                tree->ChangeOper(GT_NOT);
+                tree->gtOp2 = NULL;
+                DEBUG_DESTROY_NODE(op2);
+            }
+            else if ((op2->gtOper == GT_CNS_LNG) && (op2->gtIntConCommon.LngValue() == -1))
+            {
+                tree->ChangeOper(GT_NOT);
+                tree->gtOp2 = NULL;
+                DEBUG_DESTROY_NODE(op2);
+            }
+            else if ((op2->gtOper == GT_CNS_INT) && (op2->gtIntConCommon.IconValue() == 1) &&
+                     op1->OperIsCompare())
+            {
+                /* "binaryVal ^ 1" is "!binaryVal" */
+                gtReverseCond(op1);
+                DEBUG_DESTROY_NODE(op2);
+                DEBUG_DESTROY_NODE(tree);
+                return op1;
+            }
         }
 
         break;
@@ -14509,7 +14533,12 @@ Compiler::fgWalkResult      Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr
 
     case GT_ADD:
         assert(axc != AXC_Addr);
-        if (axc == AXC_Ind)
+        // See below about treating pointer operations as wider indirection.
+        if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
+        {
+            axcStack->Push(AXC_IndWide);
+        }
+        else if (axc == AXC_Ind)
         {
             // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
             // If it's an add of a constant and an address, and the constant represents a field,
@@ -14522,16 +14551,55 @@ Compiler::fgWalkResult      Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTr
         }
         return WALK_CONTINUE;
 
+    // !!! Treat Pointer Operations as Wider Indirection
+    //
+    // If we are performing pointer operations, make sure we treat that as equivalent to a wider
+    // indirection. This is because the pointers could be pointing to the address of struct fields
+    // and could be used to perform operations on the whole struct or passed to another method.
+    // 
+    // When visiting a node in this pre-order walk, we do not know if we would in the future
+    // encounter a GT_ADDR of a GT_FIELD below.
+    //
+    // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
+    // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
+    // wider indirection context down the expr tree.
+    //
+    // Example, in unsafe code,
+    //
+    //   IL_000e  12 00             ldloca.s     0x0
+    //   IL_0010  7c 02 00 00 04    ldflda       0x4000002
+    //   IL_0015  12 00             ldloca.s     0x0
+    //   IL_0017  7c 01 00 00 04    ldflda       0x4000001
+    //   IL_001c  59                sub
+    //
+    // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
+    // consider GT_SUB to be equivalent of an AXC_IndWide.
+    //
+    // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
+    // them as AXC_IndWide.
+    //
+    
+    // BINOP
+    case GT_SUB:
+    case GT_MUL:
+    case GT_DIV:
+    case GT_UDIV:
+    case GT_OR:
+    case GT_XOR:
+    case GT_AND:
+    case GT_LSH:
+    case GT_RSH:
+    case GT_RSZ:
     case GT_EQ:
     case GT_NE:
     case GT_LT:
     case GT_LE:
-    case GT_GE:
     case GT_GT:
+    case GT_GE:
+    // UNOP
     case GT_CAST:
-        if (tree->gtOp.gtOp1->gtType == TYP_BYREF)
+        if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) || (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
         {
-            // if code is trying to convert a byref or compare one, pessimize.  
             axcStack->Push(AXC_IndWide);
             return WALK_CONTINUE;
         }
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index 753b0aaf6e..b51e772c62 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -1900,7 +1900,10 @@ NO_LOOP: ;
             continue;
 
         // Otherwise...
-        mod = mod || optCanonicalizeLoopNest(loopInd);
+        if (optCanonicalizeLoopNest(loopInd))
+        {
+            mod = true;
+        }
     }
     if (mod)
     {
@@ -1954,7 +1957,6 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
                 {
                     blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
                     redirected = true;
-                
                 }
             }
             // If any redirections happend, invalidate the switch table map for the switch.
@@ -2002,21 +2004,32 @@ void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
     }
 }
 
+// Canonicalize the loop nest rooted at parent loop 'loopInd'.
+// Returns 'true' if the flow graph is modified.
 bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
 {
-    bool res = false;
+    bool modified = false;
+
     // Is the top of the current loop not in any nested loop?
     if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
     {
-        res = res || optCanonicalizeLoop(loopInd);
+        if (optCanonicalizeLoop(loopInd))
+        {
+            modified = true;
+        }
     }
+
     for (unsigned char child = optLoopTable[loopInd].lpChild; 
          child != BasicBlock::NOT_IN_LOOP; 
          child = optLoopTable[child].lpSibling)
     {
-        res = res || optCanonicalizeLoopNest(child);
+        if (optCanonicalizeLoopNest(child))
+        {
+            modified = true;
+        }
     }
-    return res;
+
+    return modified;
 }
 
 bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
@@ -2027,6 +2040,9 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
     if (t->bbNatLoopNum == loopInd)
         return false;
 
+    JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to canonicalize\n",
+            loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
+
     // Otherwise, the top of this loop is also part of a nested loop.
     //
     // Insert a new unique top for this loop. We must be careful to put this new
@@ -2064,6 +2080,29 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
     // ...
     // BB12 BBJ_ALWAYS => BB30
     //
+    // Another possibility is that the "first" block of the loop nest can be the first block
+    // of a "try" region that also has other predecessors than those in the loop, or even in
+    // the "try" region (since blocks can target the first block of a "try" region). For example:
+    //
+    // BB08 try {
+    // ...
+    // BB10 BBJ_ALWAYS => BB08
+    // ...
+    // BB12 BBJ_ALWAYS => BB08
+    // BB13 }
+    // ...
+    // BB20 BBJ_ALWAYS => BB08
+    // ...
+    // BB25 BBJ_ALWAYS => BB08
+    //
+    // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
+    // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
+    // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
+    // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
+    // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
+    // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
+    // situation of branching to a non-first block of a 'try' region.
+    //
     // We can also have a loop nest where the "first" block is outside of a "try" region
     // and the back edges are inside a "try" region, for example:
     //
@@ -2106,6 +2145,35 @@ bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
     blockMap->Set(t, newT);
     optRedirectBlock(b, blockMap);
 
+    // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
+    // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
+    // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
+    // they might have been prevented from participating in the loop nest due to different EH nesting, or some
+    // other reason.
+    //
+    // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
+    // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
+    // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
+    // edge of the blockMap, so nothing will happen.
+    for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
+    {
+        BasicBlock* topPredBlock = topPred->flBlock;
+
+        // Skip if topPredBlock is in the loop.
+        // Note that this uses block number to detect membership in the loop. We are adding blocks during canonicalization,
+        // and those block numbers will be new, and larger than previous blocks. However, we work outside-in, so we
+        // shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
+        if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
+        {
+            JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not redirecting its bottom edge\n",
+                    topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
+            continue;
+        }
+
+        JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum, newT->bbNum);
+        optRedirectBlock(topPredBlock, blockMap);
+    }
+
     assert(newT->bbNext == f);
     if (f != t)
     {
@@ -4748,6 +4816,8 @@ bool                Compiler::optNarrowTree(GenTreePtr     tree,
                 if  (doit)
                 {
                     tree->gtType = genActualType(dstt);
+                    tree->ClearVN();
+
                     optNarrowTree(op2, srct, dstt, true);
                     // We may also need to cast away the upper bits of op1
                     if (srcSize == 8) 
@@ -4781,8 +4851,10 @@ COMMON_BINOP:
             noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
             noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
 
-            if  (!optNarrowTree(op1, srct, dstt, doit) ||
-                 !optNarrowTree(op2, srct, dstt, doit))
+            if (gtIsActiveCSE_Candidate(op1)          ||
+                gtIsActiveCSE_Candidate(op2)          ||
+                !optNarrowTree(op1, srct, dstt, doit) ||
+                !optNarrowTree(op2, srct, dstt, doit)   )
             {
                 noway_assert(doit == false);
                 return  false;
@@ -4796,6 +4868,7 @@ COMMON_BINOP:
                     tree->gtFlags &= ~GTF_MUL_64RSLT;
 
                 tree->gtType = genActualType(dstt);
+                tree->ClearVN();
             }
 
             return true;
@@ -4808,6 +4881,7 @@ NARROW_IND:
             if  (doit && (dstSize <= genTypeSize(tree->gtType)))
             {
                 tree->gtType = genSignedType(dstt);
+                tree->ClearVN();
 
                 /* Make sure we don't mess up the variable type */
                 if  ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
@@ -4870,6 +4944,7 @@ NARROW_IND:
                             // The result type of a GT_CAST is never a small type.
                             // Use genActualType to widen dstt when it is a small types.
                             tree->gtType = genActualType(dstt);
+                            tree->ClearVN();
                         }
                     }
 
@@ -4879,12 +4954,16 @@ NARROW_IND:
             return  false;
 
         case GT_COMMA:
-            if (optNarrowTree(op2, srct, dstt, doit)) 
+            if (!gtIsActiveCSE_Candidate(op2)  &&
+                optNarrowTree(op2, srct, dstt, doit))
             {               
                 /* Simply change the type of the tree */
 
                 if  (doit)
+                {
                     tree->gtType = genActualType(dstt);
+                    tree->ClearVN();
+                }
                 return true;
             }
             return false;
diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp
index f6b452686f..70e4e96add 100644
--- a/src/jit/rangecheck.cpp
+++ b/src/jit/rangecheck.cpp
@@ -343,7 +343,11 @@ bool RangeCheck::IsMonotonicallyIncreasing(GenTreePtr expr, SearchPath* path)
     // If the rhs expr is constant, then it is not part of the dependency
     // loop which has to increase monotonically.
     ValueNum vn = expr->gtVNPair.GetConservative();
-    if (m_pCompiler->vnStore->IsVNConstant(vn))
+    if (path->GetCount() > MAX_SEARCH_DEPTH)
+    {
+        return false;
+    }
+    else if (m_pCompiler->vnStore->IsVNConstant(vn))
     {
         return true;
     }
@@ -885,10 +889,16 @@ bool RangeCheck::AddOverflows(Limit& limit1, Limit& limit2)
 // Does the bin operation overflow.
 bool RangeCheck::DoesBinOpOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr op1, GenTreePtr op2, SearchPath* path)
 {
-    if (DoesOverflow(block, stmt, op1, path) || DoesOverflow(block, stmt, op2, path))
+    if (!path->Lookup(op1) && DoesOverflow(block, stmt, op1, path))
     {
         return true;
     }
+
+    if (!path->Lookup(op2) && DoesOverflow(block, stmt, op2, path))
+    {
+        return true;
+    }
+
     // Get the cached ranges of op1
     Range* op1Range = nullptr;
     if (!GetRangeMap()->Lookup(op1, &op1Range))
@@ -983,15 +993,17 @@ bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTree
     JITDUMP("Does overflow %p?\n", dspPtr(expr));
     path->Set(expr, block);
 
-    noway_assert(path->GetCount() <= MAX_SEARCH_DEPTH);
-
     bool overflows = true;
 
     // Remove hashtable entry for expr when we exit the present scope.
     Range range = Limit(Limit::keUndef);
     ValueNum vn = expr->gtVNPair.GetConservative();
+    if (path->GetCount() > MAX_SEARCH_DEPTH)
+    {
+        overflows = true;
+    }
     // If the definition chain resolves to a constant, it doesn't overflow.
-    if (m_pCompiler->vnStore->IsVNConstant(vn))
+    else if (m_pCompiler->vnStore->IsVNConstant(vn))
     {
         overflows = false;
     }
@@ -1033,7 +1045,7 @@ struct Node
 // eg.: merge((0, dep), (dep, dep)) = (0, dep)
 Range RangeCheck::ComputeRange(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent))
 {
-    bool newlyAdded = path->Set(expr, block);
+    bool newlyAdded = !path->Set(expr, block);
     Range range = Limit(Limit::keUndef);
 
     ValueNum vn = expr->gtVNPair.GetConservative();
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index 8d6a21edf5..59fed64056 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -801,8 +801,9 @@ CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
     var_types targetType = simdNode->TypeGet();
     InstructionSet iset = compiler->getSIMDInstructionSet();
 
-    regNumber op1Reg = genConsumeReg(op1);
-    regNumber op2Reg = genConsumeReg(op2);
+    genConsumeOperands(simdNode);
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = op2->gtRegNum;
     regNumber otherReg = op2Reg;
 
     // Vector<Int>.Mul:
@@ -990,8 +991,9 @@ CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
     var_types targetType = simdNode->TypeGet();
     InstructionSet iset = compiler->getSIMDInstructionSet();
 
-    regNumber op1Reg = genConsumeReg(op1);
-    regNumber op2Reg = genConsumeReg(op2);
+    genConsumeOperands(simdNode);
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = op2->gtRegNum;
     regNumber otherReg = op2Reg;
 
     switch(simdNode->gtSIMDIntrinsicID)
@@ -1211,8 +1213,9 @@ CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
     assert(targetType == baseType);
     assert(varTypeIsFloating(baseType));
 
-    regNumber op1Reg = genConsumeReg(op1);
-    regNumber op2Reg = genConsumeReg(op2);
+    genConsumeOperands(simdNode);
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = op2->gtRegNum;
 
     regNumber tmpReg = REG_NA;
     // For SSE, or AVX with 32-byte vectors, we need an additional Xmm register as scratch.
@@ -1390,7 +1393,8 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
     // GetItem has 2 operands:
     // - the source of SIMD type (op1)
     // - the index of the value to be returned.
-    regNumber srcReg = genConsumeReg(op1);
+    genConsumeOperands(simdNode);
+    regNumber srcReg = op1->gtRegNum;
     
     // SSE2 doesn't have an instruction to implement this intrinsic if the index is not a constant.
     // For the non-constant case, we will use the SIMD temp location to store the vector, and
@@ -1403,7 +1407,7 @@ CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
         noway_assert(simdInitTempVarNum != BAD_VAR_NUM);
         bool isEBPbased;
         unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased);
-        regNumber indexReg = genConsumeReg(op2);
+        regNumber indexReg = op2->gtRegNum;
 
         // Store the vector to the temp location.
         getEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), 
@@ -1579,8 +1583,9 @@ CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
     assert(op2->TypeGet() == baseType);
     assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType)));
 
-    regNumber op1Reg = genConsumeReg(op1);
-    regNumber op2Reg = genConsumeReg(op2);
+    genConsumeOperands(simdNode);
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = op2->gtRegNum;
 
     // TODO-CQ: For AVX we don't need to do a copy because it supports 3 operands plus immediate.
     if (targetReg != op1Reg)
@@ -1694,17 +1699,7 @@ CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
     assert(genCountBits(treeNode->gtRsvdRegs) == 1);
     regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
 
-    bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0);
-    if (!reverseOps)
-    {
-        genConsumeReg(addr);
-        genConsumeReg(data);
-    }
-    else
-    {
-        genConsumeReg(data);
-        genConsumeReg(addr);        
-    }
+    genConsumeOperands(treeNode->AsOp());
 
     // 8-byte write
     getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum, 0);
diff --git a/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs b/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs
index a3e1348c79..1851dc3666 100644
--- a/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs
+++ b/src/mscorlib/src/System/Diagnostics/Eventing/FrameworkEventSource.cs
@@ -500,15 +500,13 @@ namespace System.Diagnostics.Tracing {
         [Event(140, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable,
          Task = Tasks.GetResponse, Opcode = EventOpcode.Start, Version = 1)]
         private void GetResponseStart(long id, string uri, bool success, bool synchronous) {
-            if (IsEnabled())
-                WriteEvent(140, id, uri, success, synchronous);
+            WriteEvent(140, id, uri, success, synchronous);
         }
 
         [Event(141, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable, 
          Task = Tasks.GetResponse, Opcode = EventOpcode.Stop, Version = 1)]
         private void GetResponseStop(long id, bool success, bool synchronous, int statusCode) {
-            if (IsEnabled())
-                WriteEvent(141, id, success, synchronous, statusCode);
+            WriteEvent(141, id, success, synchronous, statusCode);
         }
 
         // In the desktop runtime they don't use Tasks for the point at which the response happens, which means that the
@@ -516,35 +514,37 @@ namespace System.Diagnostics.Tracing {
         [Event(142, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable,
          Task = Tasks.GetRequestStream, Opcode = EventOpcode.Start, Version = 1)]
         private void GetRequestStreamStart(long id, string uri, bool success, bool synchronous) {
-            if (IsEnabled())
-                WriteEvent(142, id, uri, success, synchronous);
+            WriteEvent(142, id, uri, success, synchronous);
         }
 
         [Event(143, Level = EventLevel.Informational, Keywords = Keywords.NetClient, ActivityOptions=EventActivityOptions.Disable,
          Task = Tasks.GetRequestStream, Opcode = EventOpcode.Stop, Version = 1)]
         private void GetRequestStreamStop(long id, bool success, bool synchronous) {
-            if (IsEnabled())
-                WriteEvent(143, id, success, synchronous);
+            WriteEvent(143, id, success, synchronous);
         }
 
         [NonEvent, System.Security.SecuritySafeCritical]
         public unsafe void BeginGetResponse(object id, string uri, bool success, bool synchronous) {
-            GetResponseStart((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), uri, success, synchronous);
+            if (IsEnabled())
+                GetResponseStart(IdForObject(id), uri, success, synchronous);
         }
             
         [NonEvent, System.Security.SecuritySafeCritical]
         public unsafe void EndGetResponse(object id, bool success, bool synchronous, int statusCode) {
-            GetResponseStop((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), success, synchronous, statusCode);
+            if (IsEnabled())
+                GetResponseStop(IdForObject(id), success, synchronous, statusCode);
         }
 
         [NonEvent, System.Security.SecuritySafeCritical]
         public unsafe void BeginGetRequestStream(object id, string uri, bool success, bool synchronous) {
-            GetRequestStreamStart((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), uri, success, synchronous);
+            if (IsEnabled())
+                GetRequestStreamStart(IdForObject(id), uri, success, synchronous);
         }
 
         [NonEvent, System.Security.SecuritySafeCritical]
         public unsafe void EndGetRequestStream(object id, bool success, bool synchronous) {
-            GetRequestStreamStop((long)*((void**)JitHelpers.UnsafeCastToStackPointer(ref id)), success, synchronous);
+            if (IsEnabled())
+                GetRequestStreamStop(IdForObject(id), success, synchronous);
         }
 
         // id -   represents a correlation ID that allows correlation of two activities, one stamped by 
@@ -607,6 +607,13 @@ namespace System.Diagnostics.Tracing {
             ThreadTransferReceive((long) *((void**) JitHelpers.UnsafeCastToStackPointer(ref id)), kind, info);
         }
 
+        // return a stable ID for a an object.  We use the hash code which is not truely unique but is 
+        // close enough for now at least. we add to it 0x7FFFFFFF00000000 to make it distinguishable
+        // from the style of ID that simply casts the object reference to a long (since old versions of the 
+        // runtime will emit IDs of that form).  
+        private static long IdForObject(object obj) {
+            return obj.GetHashCode() + 0x7FFFFFFF00000000;
+        }
     }
 }
 
diff --git a/src/mscorlib/src/System/Environment.cs b/src/mscorlib/src/System/Environment.cs
index b517b58b66..163fd19eda 100644
--- a/src/mscorlib/src/System/Environment.cs
+++ b/src/mscorlib/src/System/Environment.cs
@@ -1055,7 +1055,12 @@ namespace System {
         ==============================================================================*/
         public static Version Version {
             get {
-                return new Version(ThisAssembly.InformationalVersion);
+
+                // Previously this represented the File version of mscorlib.dll.  Many other libraries in the framework and outside took dependencies on the first three parts of this version 
+                // remaining constant throughout 4.x.  From 4.0 to 4.5.2 this was fine since the file version only incremented the last part.Starting with 4.6 we switched to a file versioning
+                // scheme that matched the product version.  In order to preserve compatibility with existing libraries, this needs to be hard-coded.
+                
+                return new Version(4,0,30319,42000);
             }
         }
 
diff --git a/src/mscorlib/src/System/Globalization/CultureInfo.cs b/src/mscorlib/src/System/Globalization/CultureInfo.cs
index 2cdad273d2..3bc804650e 100644
--- a/src/mscorlib/src/System/Globalization/CultureInfo.cs
+++ b/src/mscorlib/src/System/Globalization/CultureInfo.cs
@@ -287,6 +287,28 @@ namespace System.Globalization {
  
             return toReturn;
         }
+
+        [SecuritySafeCritical]
+        internal static bool SetCultureInfoForUserPreferredLanguageInAppX(CultureInfo ci)
+        {
+            // If running within a compilation process (mscorsvw.exe, for example), it is illegal to
+            // load any non-mscorlib assembly for execution. Since WindowsRuntimeResourceManager lives
+            // in System.Runtime.WindowsRuntime, caller will need to fall back to default Win32 value,
+            // which should be fine because we should only ever need to access FX resources during NGEN.
+            // FX resources are always loaded from satellite assemblies - even in AppX processes (see the
+            // comments in code:System.Resources.ResourceManager.SetAppXConfiguration for more details).
+            if (AppDomain.IsAppXNGen)
+            {
+                return false;
+            }
+
+            if (s_WindowsRuntimeResourceManager == null)
+            {
+                s_WindowsRuntimeResourceManager = ResourceManager.GetWinRTResourceManager();
+            }
+
+            return s_WindowsRuntimeResourceManager.SetGlobalResourceContextDefaultCulture(ci);
+        }
 #endif
 
         ////////////////////////////////////////////////////////////////////////
@@ -694,7 +716,19 @@ namespace System.Globalization {
             }
 
             set {
-                Thread.CurrentThread.CurrentCulture = value;
+#if FEATURE_APPX
+                    if (value == null) {
+                        throw new ArgumentNullException("value");
+                    }                    
+
+                    if (AppDomain.IsAppXModel()) {
+                        if (SetCultureInfoForUserPreferredLanguageInAppX(value)) {
+                            // successfully set the culture, otherwise fallback to legacy path
+                            return; 
+                        }
+                    }
+#endif
+                    Thread.CurrentThread.CurrentCulture = value;
             }
         }
 
@@ -780,7 +814,19 @@ namespace System.Globalization {
             }
 
             set {
-                Thread.CurrentThread.CurrentUICulture = value;
+#if FEATURE_APPX
+                    if (value == null) {
+                        throw new ArgumentNullException("value");
+                    }                    
+
+                    if (AppDomain.IsAppXModel()) {
+                        if (SetCultureInfoForUserPreferredLanguageInAppX(value)) {
+                            // successfully set the culture, otherwise fallback to legacy path
+                            return; 
+                        }
+                    }
+#endif
+                    Thread.CurrentThread.CurrentUICulture = value;
             }
         }
 
diff --git a/src/mscorlib/src/System/Globalization/DateTimeFormat.cs b/src/mscorlib/src/System/Globalization/DateTimeFormat.cs
index 58f35bd2ef..82bc4b6f79 100644
--- a/src/mscorlib/src/System/Globalization/DateTimeFormat.cs
+++ b/src/mscorlib/src/System/Globalization/DateTimeFormat.cs
@@ -929,6 +929,7 @@ namespace System {
                         case Calendar.CAL_HEBREW:
                         case Calendar.CAL_JULIAN:
                         case Calendar.CAL_UMALQURA:
+                        case Calendar.CAL_PERSIAN:
                             timeOnlySpecialCase = true;
                             dtfi = DateTimeFormatInfo.InvariantInfo;
                             break;                        
diff --git a/src/mscorlib/src/System/Globalization/DateTimeParse.cs b/src/mscorlib/src/System/Globalization/DateTimeParse.cs
index 1af84a44f6..bbece12b4f 100644
--- a/src/mscorlib/src/System/Globalization/DateTimeParse.cs
+++ b/src/mscorlib/src/System/Globalization/DateTimeParse.cs
@@ -3683,14 +3683,21 @@ new DS[] { DS.ERROR, DS.TX_NNN,  DS.TX_NNN,  DS.TX_NNN,  DS.ERROR,   DS.ERROR,
                     // Otherwise it is unspecified and we consume no characters
                     break;
                 case ':':
-                    if (!str.Match(dtfi.TimeSeparator)) {
+                    // We match the separator in time pattern with the character in the time string if both equal to ':' or the date separator is matching the characters in the date string
+                    // We have to exclude the case when the time separator is more than one character and starts with ':' something like "::" for instance.
+                    if (((dtfi.TimeSeparator.Length > 1 && dtfi.TimeSeparator[0] == ':') || !str.Match(':')) && 
+                        !str.Match(dtfi.TimeSeparator)) {
                         // A time separator is expected.
                         result.SetFailure(ParseFailureKind.Format, "Format_BadDateTime", null);
                         return false;
                     }
                     break;
                 case '/':
-                    if (!str.Match(dtfi.DateSeparator)) {
+                    // We match the separator in date pattern with the character in the date string if both equal to '/' or the date separator is matching the characters in the date string
+                    // We have to exclude the case when the date separator is more than one character and starts with '/' something like "//" for instance.
+                    if (((dtfi.DateSeparator.Length > 1 && dtfi.DateSeparator[0] == '/') || !str.Match('/')) && 
+                        !str.Match(dtfi.DateSeparator))
+                    {
                         // A date separator is expected.
                         result.SetFailure(ParseFailureKind.Format, "Format_BadDateTime", null);
                         return false;
diff --git a/src/mscorlib/src/System/Resources/ResourceManager.cs b/src/mscorlib/src/System/Resources/ResourceManager.cs
index f55b55482a..132d854dd3 100644
--- a/src/mscorlib/src/System/Resources/ResourceManager.cs
+++ b/src/mscorlib/src/System/Resources/ResourceManager.cs
@@ -38,7 +38,10 @@ namespace System.Resources {
     //
     // This is implemented in System.Runtime.WindowsRuntime as function System.Resources.WindowsRuntimeResourceManager,
     // allowing us to ask for a WinRT-specific ResourceManager.
-    // Ideally this would be an interface, or at least an abstract class - but neither seems to play nice with FriendAccessAllowed.
+    // It is important to have WindowsRuntimeResourceManagerBase as regular class with virtual methods and default implementations. 
+    // Defining WindowsRuntimeResourceManagerBase as abstract class or interface will cause issues when adding more methods to it 
+    // because it’ll create dependency between mscorlib and System.Runtime.WindowsRuntime which will require always shipping both DLLs together. 
+    // Also using interface or abstract class will not play nice with FriendAccessAllowed.
     //
     [FriendAccessAllowed]
     [SecurityCritical]
@@ -54,6 +57,9 @@ namespace System.Resources {
             [SecurityCritical]
             get { return null; } 
         }
+        
+        [SecurityCritical]
+        public virtual bool SetGlobalResourceContextDefaultCulture(CultureInfo ci) { return false; }
     }
 
     [FriendAccessAllowed]
diff --git a/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs b/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs
index 18c6335d1e..7b58dbe7ff 100644
--- a/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs
+++ b/src/mscorlib/src/System/Runtime/CompilerServices/TaskAwaiter.cs
@@ -1,4 +1,4 @@
-// Copyright (c) Microsoft. All rights reserved.
+// Copyright (c) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
 
 // =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
diff --git a/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs b/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs
index 09d34779f4..ac9f4d4f89 100644
--- a/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs
+++ b/src/mscorlib/src/System/Security/Permissions/FileIOPermission.cs
@@ -967,7 +967,6 @@ namespace System.Security.Permissions {
                 }
             }
         }
-
     }
     
     [Serializable]
diff --git a/src/mscorlib/src/System/String.cs b/src/mscorlib/src/System/String.cs
index d9f40899b6..9d4dcde887 100644
--- a/src/mscorlib/src/System/String.cs
+++ b/src/mscorlib/src/System/String.cs
@@ -761,6 +761,8 @@ namespace System {
         }
 
 #if FEATURE_RANDOMIZED_STRING_HASHING
+        // Do not remove!
+        // This method is called by reflection in System.Xml
         [System.Security.SecurityCritical]
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
         internal static extern int InternalMarvin32HashString(string s, int strLen, long additionalEntropy);
diff --git a/src/mscorlib/src/System/Threading/ExecutionContext.cs b/src/mscorlib/src/System/Threading/ExecutionContext.cs
index 3eb895cb4c..3fc1e14908 100644
--- a/src/mscorlib/src/System/Threading/ExecutionContext.cs
+++ b/src/mscorlib/src/System/Threading/ExecutionContext.cs
@@ -1123,6 +1123,8 @@ namespace System.Threading
                 ec.IllogicalCallContext = (IllogicalCallContext)this.IllogicalCallContext.CreateCopy();
 #endif // #if FEATURE_REMOTING
 
+            ec._localValues = this._localValues;
+            ec._localChangeNotifications = this._localChangeNotifications;
             ec.isFlowSuppressed = this.isFlowSuppressed;
 
             return ec;
diff --git a/src/mscorlib/src/System/Threading/Tasks/Task.cs b/src/mscorlib/src/System/Threading/Tasks/Task.cs
index 2654e1ba79..ebd8c2b614 100644
--- a/src/mscorlib/src/System/Threading/Tasks/Task.cs
+++ b/src/mscorlib/src/System/Threading/Tasks/Task.cs
@@ -201,7 +201,8 @@ namespace System.Threading.Tasks
         // Values for ContingentProperties.m_internalCancellationRequested.
         private const int CANCELLATION_REQUESTED = 0x1;
 
-        // Can be null, a single continuation, a list of continuations, or s_taskCompletionSentinel.
+        // Can be null, a single continuation, a list of continuations, or s_taskCompletionSentinel,
+        // in that order. The logic arround this object assumes it will never regress to a previous state.
         private volatile object m_continuationObject = null;
 
         // m_continuationObject is set to this when the task completes.
@@ -4737,23 +4738,12 @@ namespace System.Threading.Tasks
                     // continuation.
                     if (m_continuationObject != s_taskCompletionSentinel)
                     {
-                        // Before growing the list, we look to see whether any continuations in the list 
-                        // have already completed and thus can be removed.  This helps to avoid both unnecessary
-                        // growth to the list, but more importantly it helps to avoid temporary leaks when this
-                        // task is long-lived and a registered continuation has already completed yet is still
-                        // being kept alive.  This can happen in the case of a cancelable continuation.
+                        // Before growing the list we remove possible null entries that are the
+                        // result from RemoveContinuations()
                         if (list.Count == list.Capacity)
                         {
-                            for (int index = list.Count - 1; index >= 0; index--)
-                            {
-                                StandardTaskContinuation cont = list[index] as StandardTaskContinuation;
-                                if (cont != null && cont.m_task.IsCanceled)
-                                {
-                                    list.RemoveAt(index);
-                                }
-                            }
+                            list.RemoveAll(s_IsTaskContinuationNullPredicate);
                         }
-                    
 
                         if (addBeforeOthers)
                             list.Insert(0, tc);
@@ -4792,17 +4782,37 @@ namespace System.Threading.Tasks
         // Removes a continuation task from m_continuations
         internal void RemoveContinuation(object continuationObject) // could be TaskContinuation or Action<Task>
         {
-            // We need to snap a local reference to m_continuations
-            // because we could be racing w/ FinishContinuations() which nulls out m_continuationObject in the end
-            List<object> continuationsLocalRef = m_continuationObject as List<object>;
+            // We need to snap a local reference to m_continuations since reading a volatile object is more costly.
+            // Also to prevent the value to be changed as result of a race condition with another method.
+            object continuationsLocalRef = m_continuationObject;
+
+            // Task is completed. Nothing to do here.
+            if (continuationsLocalRef == s_taskCompletionSentinel) return;
+
+            List<object> continuationsLocalListRef = continuationsLocalRef as List<object>;
 
-            // If continuationsLocalRef == null, it means that m_continuationObject is not a list.  We only
-            // remove continuations if there are a list of them.  We take no action if there are no continuations
-            // (m_continuationObject == null), if we are tracking a single continuation (m_continuationObject is
-            // TaskContinuation or Action<Task>), or if the task is complete (m_continuationObject is s_taskCompletionSentinel).
-            if (continuationsLocalRef != null)
+            if (continuationsLocalListRef == null)
             {
-                lock (continuationsLocalRef)
+                // This is not a list. If we have a single object (the one we want to remove) we try to replace it with an empty list.
+                // Note we cannot go back to a null state, since it will mess up the AddTaskContinuation logic.
+                if (Interlocked.CompareExchange(ref m_continuationObject, new List<object>(), continuationObject) != continuationObject)
+                {
+                    // If we fail it means that either AddContinuationComplex won the race condition and m_continuationObject is now a List
+                    // that contains the element we want to remove. Or FinishContinuations set the s_taskCompletionSentinel.
+                    // So we should try to get a list one more time
+                    continuationsLocalListRef = m_continuationObject as List<object>;
+                }
+                else
+                {
+                    // Exchange was successful so we can skip the last comparison
+                    return;
+                }
+            }
+
+            // if continuationsLocalRef == null it means s_taskCompletionSentinel has been set already and there is nothing else to do.
+            if (continuationsLocalListRef != null)
+            {
+                lock (continuationsLocalListRef)
                 {
                     // There is a small chance that this task completed since we took a local snapshot into
                     // continuationsLocalRef.  In that case, just return; we don't want to be manipulating the
@@ -4810,25 +4820,19 @@ namespace System.Threading.Tasks
                     if (m_continuationObject == s_taskCompletionSentinel) return;
 
                     // Find continuationObject in the continuation list
-                    int index = continuationsLocalRef.IndexOf(continuationObject);
+                    int index = continuationsLocalListRef.IndexOf(continuationObject);
 
                     if (index != -1)
                     {
                         // null out that TaskContinuation entry, which will be interpreted as "to be cleaned up"
-                        continuationsLocalRef[index] = null;
+                        continuationsLocalListRef[index] = null;
 
-                        // if the list of continuations is large enough it's time to compact it by removing
-                        // all entries marked for clean up
-                        if (continuationsLocalRef.Count > 128)
-                        {
-                            continuationsLocalRef.RemoveAll(s_IsTaskContinuationNullPredicate); // RemoveAll has better performance than doing it ourselves
-                        }
                     }
                 }
             }
         }
 
-        // statically allocated delegate for the RemoveAll expression in RemoveContinuations()
+        // statically allocated delegate for the RemoveAll expression in RemoveContinuations() and AddContinuationComplex()
         private readonly static Predicate<object> s_IsTaskContinuationNullPredicate =
             new Predicate<object>((tc) => { return (tc == null); });
 
diff --git a/src/tools/crossgen/crossgen.nativeproj b/src/tools/crossgen/crossgen.nativeproj
index da37792489..5377194c76 100644
--- a/src/tools/crossgen/crossgen.nativeproj
+++ b/src/tools/crossgen/crossgen.nativeproj
@@ -41,9 +41,15 @@
       <ProjectReference>$(ClrSrcDirectory)zap\crossgen\zap_crossgen.nativeproj</ProjectReference>
     </TargetLib>
 
-    <TargetLib Condition="'$(MDILGenerator)' != 'true'" Include="$(ClrLibPath)\jit_crossgen.lib">
-      <ProjectReference Condition="'$(_BuildArch)' != 'amd64' and '$(_BuildArch)' != 'arm64'">$(ClrSrcDirectory)jit32\crossgen\jit_crossgen.nativeproj</ProjectReference>
+    <!-- In the CodeGen branch, we use RyuJIT for all JIT builds -->
+    <TargetLib Condition="'$(MDILGenerator)' != 'true' and '$(_BuildBranch)' == 'CodeGen'" Include="$(ClrLibPath)\jit_crossgen.lib">
+      <ProjectReference>$(ClrSrcDirectory)jit\crossgen\jit_crossgen.nativeproj</ProjectReference>
+    </TargetLib>
+
+    <!-- In other branches, we build RyuJIT only for amd64 and arm64, and use JIT32 for ARM and x86 -->
+    <TargetLib Condition="'$(MDILGenerator)' != 'true' and '$(_BuildBranch)' != 'CodeGen'" Include="$(ClrLibPath)\jit_crossgen.lib">
       <ProjectReference Condition="'$(_BuildArch)' == 'amd64' or  '$(_BuildArch)' == 'arm64'">$(ClrSrcDirectory)jit\crossgen\jit_crossgen.nativeproj</ProjectReference>
+      <ProjectReference Condition="'$(_BuildArch)' != 'amd64' and '$(_BuildArch)' != 'arm64'">$(ClrSrcDirectory)jit32\crossgen\jit_crossgen.nativeproj</ProjectReference>
     </TargetLib>
 
     <TargetLib Condition="'$(MDILGenerator)' == 'true'" Include="$(ClrLibPath)\jitmdil_crossgen.lib">
diff --git a/src/vm/ClrEtwAll.man b/src/vm/ClrEtwAll.man
index 944f8985d6..ea3c4da935 100644
--- a/src/vm/ClrEtwAll.man
+++ b/src/vm/ClrEtwAll.man
@@ -114,6 +114,10 @@
                             <opcode name="GCBulkRootStaticVar" message="$(string.RuntimePublisher.GCBulkRootStaticVarOpcodeMessage)" symbol="CLR_GC_BULKROOTSTATICVAR_OPCODE" value="40"> </opcode>
                             <opcode name="IncreaseMemoryPressure" message="$(string.RuntimePublisher.IncreaseMemoryPressureOpcodeMessage)" symbol="CLR_GC_INCREASEMEMORYPRESSURE_OPCODE" value="200"> </opcode>
                             <opcode name="DecreaseMemoryPressure" message="$(string.RuntimePublisher.DecreaseMemoryPressureOpcodeMessage)" symbol="CLR_GC_DECREASEMEMORYPRESSURE_OPCODE" value="201"> </opcode>
+                            <opcode name="GCMarkWithType" message="$(string.RuntimePublisher.GCMarkOpcodeMessage)" symbol="CLR_GC_MARK_OPCODE" value="202"> </opcode>
+                            <opcode name="GCJoin" message="$(string.RuntimePublisher.GCJoinOpcodeMessage)" symbol="CLR_GC_JOIN_OPCODE" value="203"> </opcode>
+                            <opcode name="GCPerHeapHistory" message="$(string.RuntimePublisher.GCPerHeapHistoryOpcodeMessage)" symbol="CLR_GC_GCPERHEAPHISTORY_OPCODE" value="204"> </opcode>
+                            <opcode name="GCGlobalHeapHistory" message="$(string.RuntimePublisher.GCGlobalHeapHistoryOpcodeMessage)" symbol="CLR_GC_GCGLOBALHEAPHISTORY_OPCODE" value="205"> </opcode>
                         </opcodes>
                     </task>
 
@@ -398,7 +402,9 @@
                         <map value="0" message="$(string.RuntimePublisher.GCRootKind.Stack)"/>
                         <map value="1" message="$(string.RuntimePublisher.GCRootKind.Finalizer)"/>
                         <map value="2" message="$(string.RuntimePublisher.GCRootKind.Handle)"/>
-                        <map value="3" message="$(string.RuntimePublisher.GCRootKind.Other)"/>
+                        <map value="3" message="$(string.RuntimePublisher.GCRootKind.Older)"/>
+                        <map value="4" message="$(string.RuntimePublisher.GCRootKind.SizedRef)"/>
+                        <map value="5" message="$(string.RuntimePublisher.GCRootKind.Overflow)"/>
                     </valueMap>
                     <valueMap name="GCHandleKindMap">
                       <map value="0x0" message="$(string.RuntimePublisher.GCHandleKind.WeakShortMessage)"/>
@@ -962,6 +968,115 @@
                       </UserData>
                     </template>
 
+                    <template tid="GCMarkWithType">
+                      <data name="HeapNum" inType="win:UInt32" />
+                      <data name="ClrInstanceID" inType="win:UInt16" />
+                      <data name="Type" inType="win:UInt32" map="GCRootKindMap" />
+                      <data name="Bytes" inType="win:UInt64" />
+
+                      <UserData>
+                        <GCMarkWithType xmlns="myNs">
+                          <HeapNum> %1 </HeapNum>
+                          <ClrInstanceID> %2 </ClrInstanceID>
+                          <Type> %3 </Type>
+                          <Bytes> %4 </Bytes>
+                        </GCMarkWithType>
+                      </UserData>
+                    </template>
+
+                    <template tid="GCJoin_V2">
+                        <data name="Heap" inType="win:UInt32" />
+                        <data name="JoinTime" inType="win:UInt32" />
+                        <data name="JoinType" inType="win:UInt32" />
+                        <data name="ClrInstanceID" inType="win:UInt16" />
+                        <data name="JoinID" inType="win:UInt32" />
+
+                        <UserData>
+                            <GCJoin_V2 xmlns="myNs">
+                                <Heap> %1 </Heap>
+                                <JoinTime> %2 </JoinTime>
+                                <JoinType> %3 </JoinType>
+                                <ClrInstanceID> %4 </ClrInstanceID>
+                                <JoinID> %5 </JoinID>
+                            </GCJoin_V2>
+                        </UserData>
+                    </template>
+
+                    <template tid="GCPerHeapHistory_V3">
+                        <data name="ClrInstanceID" inType="win:UInt16" />
+     					<data name="FreeListAllocated" inType="win:Pointer" outType="win:HexInt64" />
+     					<data name="FreeListRejected" inType="win:Pointer" outType="win:HexInt64" />
+     					<data name="EndOfSegAllocated" inType="win:Pointer" outType="win:HexInt64" />
+     					<data name="CondemnedAllocated" inType="win:Pointer" outType="win:HexInt64" />
+     					<data name="PinnedAllocated" inType="win:Pointer" outType="win:HexInt64" />
+     					<data name="PinnedAllocatedAdvance" inType="win:Pointer" outType="win:HexInt64" />
+     					<data name="RunningFreeListEfficiency" inType="win:UInt32" />
+     					<data name="CondemnReasons0" inType="win:UInt32" />
+     					<data name="CondemnReasons1" inType="win:UInt32" />
+     					<data name="CompactMechanisms" inType="win:UInt32" />
+     					<data name="ExpandMechanisms" inType="win:UInt32" />
+     					<data name="HeapIndex" inType="win:UInt32" />
+     					<data name="ExtraGen0Commit" inType="win:Pointer" outType="win:HexInt64" />
+                        <data name="Count" inType="win:UInt32"    />
+                        <struct name="Values"   count="Count"  >
+							<data name="SizeBefore" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="FreeListBefore" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="FreeObjBefore" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="SizeAfter" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="FreeListAfter" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="FreeObjAfter" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="In" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="PinnedSurv" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="NonePinnedSurv" inType="win:Pointer" outType="win:HexInt64" />
+							<data name="NewAllocation" inType="win:Pointer" outType="win:HexInt64" />
+                        </struct>
+                        <UserData>
+                            <GCPerHeapHistory_V3 xmlns="myNs">
+                                <ClrInstanceID> %1 </ClrInstanceID>
+                                <FreeListAllocated> %2 </FreeListAllocated>
+                                <FreeListRejected> %3 </FreeListRejected>
+                                <EndOfSegAllocated> %4 </EndOfSegAllocated>
+                                <CondemnedAllocated> %5 </CondemnedAllocated>
+                                <PinnedAllocated> %6 </PinnedAllocated>
+                                <PinnedAllocatedAdvance> %7 </PinnedAllocatedAdvance>
+                                <RunningFreeListEfficiency> %8 </RunningFreeListEfficiency>
+                                <CondemnReasons0> %9 </CondemnReasons0>
+                                <CondemnReasons1> %10 </CondemnReasons1>
+                                <CompactMechanisms> %11 </CompactMechanisms>
+                                <ExpandMechanisms> %12 </ExpandMechanisms>
+                                <HeapIndex> %13 </HeapIndex>
+                                <ExtraGen0Commit> %14 </ExtraGen0Commit>
+                                <Count> %15 </Count>
+                            </GCPerHeapHistory_V3>
+                        </UserData>
+                    </template>
+
+                    <template tid="GCGlobalHeap_V2">
+                        <data name="FinalYoungestDesired" inType="win:UInt64" outType="win:HexInt64" />
+                        <data name="NumHeaps" inType="win:Int32" />
+                        <data name="CondemnedGeneration" inType="win:UInt32" />
+                        <data name="Gen0ReductionCount" inType="win:UInt32" />
+                        <data name="Reason" inType="win:UInt32" />
+                        <data name="GlobalMechanisms" inType="win:UInt32" />
+                        <data name="ClrInstanceID" inType="win:UInt16" />
+                        <data name="PauseMode" inType="win:UInt32" />
+                        <data name="MemoryPressure" inType="win:UInt32" />
+
+                        <UserData>
+                            <GCGlobalHeap_V2 xmlns="myNs">
+                                <FinalYoungestDesired> %1 </FinalYoungestDesired>
+                                <NumHeaps> %2 </NumHeaps>
+                                <CondemnedGeneration> %3 </CondemnedGeneration>
+                                <Gen0ReductionCount> %4 </Gen0ReductionCount>
+                                <Reason> %5 </Reason>
+                                <GlobalMechanisms> %6 </GlobalMechanisms>
+                                <ClrInstanceID> %7 </ClrInstanceID>
+                                <PauseMode> %8 </PauseMode>
+                                <MemoryPressure> %9 </MemoryPressure>
+                            </GCGlobalHeap_V2>
+                        </UserData>
+                    </template>
+
                     <template tid="FinalizeObject">
                       <data name="TypeID" inType="win:Pointer" />
                       <data name="ObjectID" inType="win:Pointer" />
@@ -2297,7 +2412,7 @@
                            symbol="GCBulkMovedObjectRanges" message="$(string.RuntimePublisher.GCBulkMovedObjectRangesEventMessage)"/>
 
                     <event value="23" version="0" level="win:Informational"  template="GCGenerationRange"
-                           keywords ="GCKeyword"  opcode="GCGenerationRange"
+                           keywords ="GCHeapSurvivalAndMovementKeyword"  opcode="GCGenerationRange"
                            task="GarbageCollection"
                            symbol="GCGenerationRange" message="$(string.RuntimePublisher.GCGenerationRangeEventMessage)"/>
                   
@@ -2895,6 +3010,26 @@
                            task="GarbageCollection"
                            symbol="DecreaseMemoryPressure" message="$(string.RuntimePublisher.DecreaseMemoryPressureEventMessage)"/>
 
+                    <event value="202" version="0" level="win:Informational"  template="GCMarkWithType"
+                           keywords ="GCKeyword"  opcode="GCMarkWithType"
+                           task="GarbageCollection"
+                           symbol="GCMarkWithType" message="$(string.RuntimePublisher.GCMarkWithTypeEventMessage)"/>
+
+                    <event value="203" version="2" level="win:Verbose"  template="GCJoin_V2"
+                           keywords ="GCKeyword"  opcode="GCJoin"
+                           task="GarbageCollection"
+                           symbol="GCJoin_V2" message="$(string.RuntimePublisher.GCJoin_V2EventMessage)"/>
+
+                    <event value="204" version="3" level="win:Informational"  template="GCPerHeapHistory_V3"
+                           keywords ="GCKeyword"  opcode="GCPerHeapHistory"
+                           task="GarbageCollection"
+                           symbol="GCPerHeapHistory_V3" message="$(string.RuntimePublisher.GCPerHeapHistory_V3EventMessage)"/>
+
+                    <event value="205" version="2" level="win:Informational"  template="GCGlobalHeap_V2"
+                           keywords ="GCKeyword"  opcode="GCGlobalHeapHistory"
+                           task="GarbageCollection"
+                           symbol="GCGlobalHeapHistory_V2" message="$(string.RuntimePublisher.GCGlobalHeap_V2EventMessage)"/>
+
                     <!-- CLR Debugger events 240-249 -->
                     <event value="240" version="0" level="win:Informational"
                            keywords="DebuggerKeyword" opcode="win:Start"
@@ -5945,6 +6080,10 @@
                 <string id="RuntimePublisher.GCMarkFinalizeQueueRootsEventMessage" value="HeapNum=%1;%nClrInstanceID=%2"/>
                 <string id="RuntimePublisher.GCMarkHandlesEventMessage" value="HeapNum=%1;%nClrInstanceID=%2"/>
                 <string id="RuntimePublisher.GCMarkOlderGenerationRootsEventMessage" value="HeapNum=%1;%nClrInstanceID=%2"/>
+                <string id="RuntimePublisher.GCMarkWithTypeEventMessage" value="HeapNum=%1;%nClrInstanceID=%2;%nType=%3;%nBytes=%4"/>
+                <string id="RuntimePublisher.GCJoin_V2EventMessage" value="Heap=%1;%nJoinTime=%2;%nJoinType=%3;%nClrInstanceID=%4;%nJoinID=%5"/>
+                <string id="RuntimePublisher.GCPerHeapHistory_V3EventMessage" value="ClrInstanceID=%1;%nFreeListAllocated=%2;%nFreeListRejected=%3;%nEndOfSegAllocated=%4;%nCondemnedAllocated=%5;%nPinnedAllocated=%6;%nPinnedAllocatedAdvance=%7;%RunningFreeListEfficiency=%8;%nCondemnReasons0=%9;%nCondemnReasons1=%10;%nCompactMechanisms=%11;%nExpandMechanisms=%12;%nHeapIndex=%13;%nExtraGen0Commit=%14;%nCount=%15"/>
+                <string id="RuntimePublisher.GCGlobalHeap_V2EventMessage" value="FinalYoungestDesired=%1;%nNumHeaps=%2;%nCondemnedGeneration=%3;%nGen0ReductionCountD=%4;%nReason=%5;%nGlobalMechanisms=%6;%nClrInstanceID=%7;%nPauseMode=%8;%nMemoryPressure=%9"/>
                 <string id="RuntimePublisher.FinalizeObjectEventMessage" value="TypeID=%1;%nObjectID=%2;%nClrInstanceID=%3" />
                 <string id="RuntimePublisher.GCTriggeredEventMessage" value="Reason=%1" />
                 <string id="RuntimePublisher.PinObjectAtGCTimeEventMessage" value="HandleID=%1;%nObjectID=%2;%nObjectSize=%3;%nTypeName=%4;%n;%nClrInstanceID=%5" />
@@ -6296,7 +6435,9 @@
                 <string id="RuntimePublisher.GCRootKind.Stack" value="Stack" />
                 <string id="RuntimePublisher.GCRootKind.Finalizer" value="Finalizer" />
                 <string id="RuntimePublisher.GCRootKind.Handle" value="Handle" />
-                <string id="RuntimePublisher.GCRootKind.Other" value="Other" />
+                <string id="RuntimePublisher.GCRootKind.Older" value="Older" />
+                <string id="RuntimePublisher.GCRootKind.SizedRef" value="SizedRef" />
+                <string id="RuntimePublisher.GCRootKind.Overflow" value="Overflow" />
                 <string id="RuntimePublisher.Startup.CONCURRENT_GCMapMessage" value="CONCURRENT_GC" />
                 <string id="RuntimePublisher.Startup.LOADER_OPTIMIZATION_SINGLE_DOMAINMapMessage" value="LOADER_OPTIMIZATION_SINGLE_DOMAIN" />
                 <string id="RuntimePublisher.Startup.LOADER_OPTIMIZATION_MULTI_DOMAINMapMessage" value="LOADER_OPTIMIZATION_MULTI_DOMAIN" />
@@ -6523,6 +6664,10 @@
                 <string id="RuntimePublisher.GCMarkHandlesOpcodeMessage" value="MarkHandles" />
                 <string id="RuntimePublisher.GCMarkFinalizeQueueRootsOpcodeMessage" value="MarkFinalizeQueueRoots" />
                 <string id="RuntimePublisher.GCMarkOlderGenerationRootsOpcodeMessage" value="MarkCards" />
+                <string id="RuntimePublisher.GCMarkOpcodeMessage" value="Mark" />
+                <string id="RuntimePublisher.GCJoinOpcodeMessage" value="GCJoin" />
+                <string id="RuntimePublisher.GCPerHeapHistoryOpcodeMessage" value="PerHeapHistory" />
+                <string id="RuntimePublisher.GCGlobalHeapHistoryOpcodeMessage" value="GlobalHeapHistory" />
                 <string id="RuntimePublisher.FinalizeObjectOpcodeMessage" value="FinalizeObject" />
                 <string id="RuntimePublisher.BulkTypeOpcodeMessage" value="BulkType" />
                 <string id="RuntimePublisher.MethodLoadOpcodeMessage" value="Load" />
diff --git a/src/vm/ClrEtwAllMeta.lst b/src/vm/ClrEtwAllMeta.lst
index 355fe08601..baa138dedf 100644
--- a/src/vm/ClrEtwAllMeta.lst
+++ b/src/vm/ClrEtwAllMeta.lst
@@ -101,6 +101,8 @@ nomac:GarbageCollection:::GCMarkHandles
 nostack:GarbageCollection:::GCMarkHandles
 nomac:GarbageCollection:::GCMarkOlderGenerationRoots
 nostack:GarbageCollection:::GCMarkOlderGenerationRoots
+nomac:GarbageCollection:::GCMarkWithType
+nostack:GarbageCollection:::GCMarkWithType
 nostack:GarbageCollection:::PinObjectAtGCTime
 nostack:GarbageCollection:::FinalizeObject
 nostack:GarbageCollection:::GCGenerationRange
@@ -113,6 +115,11 @@ nostack:GarbageCollection:::GCBulkMovedObjectRanges
 nostack:GarbageCollection:::GCBulkRootCCW
 nostack:GarbageCollection:::GCBulkRCW
 nostack:GarbageCollection:::GCBulkRootStaticVar
+nomac:GarbageCollection:::GCPerHeapHistory_V3
+nostack:GarbageCollection:::GCPerHeapHistory_V3
+nomac:GarbageCollection:::GCGlobalHeap_V2
+nostack:GarbageCollection:::GCGlobalHeap_V2
+nomac:GarbageCollection:::GCJoin_V2
 
 #############
 # Type events
diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp
index 123b415141..8721b54714 100644
--- a/src/vm/ceemain.cpp
+++ b/src/vm/ceemain.cpp
@@ -882,10 +882,7 @@ void EEStartupHelper(COINITIEE fFlags)
 
         // Fire the EE startup ETW event
         ETWFireEvent(EEStartupStart_V1);
-
-        // Fire the runtime information ETW event
-        ETW::InfoLog::RuntimeInformation(ETW::InfoLog::InfoStructs::Normal);
-#endif // FEATURE_EVENT_TRACE        
+#endif // FEATURE_EVENT_TRACE
 
 #ifdef FEATURE_IPCMAN
         // Give PerfMon a chance to hook up to us
@@ -954,6 +951,9 @@ void EEStartupHelper(COINITIEE fFlags)
             IfFailGoLog(g_pConfig->sync());        
         }
 
+        // Fire the runtime information ETW event
+        ETW::InfoLog::RuntimeInformation(ETW::InfoLog::InfoStructs::Normal);
+
         if (breakOnEELoad.val(CLRConfig::UNSUPPORTED_BreakOnEELoad) == 1)
         {
 #ifdef _DEBUG
diff --git a/src/vm/codeman.cpp b/src/vm/codeman.cpp
index bd9a82f270..77efda3d92 100644
--- a/src/vm/codeman.cpp
+++ b/src/vm/codeman.cpp
@@ -1518,10 +1518,6 @@ BOOL EEJitManager::LoadJIT()
 
     bool fUseRyuJit = (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_UseRyuJit) == 1); // uncached access, since this code is run no more than one time
 
-    // ****** TODO: Until the registry value is set by the .NET 4.6 installer, we pretend .NET 4.6 has been installed, which causes
-    // ******       RyuJit to be used by default.
-    fUseRyuJit = true;
-
     if ((!IsCompilationProcess() || !fUseRyuJit) &&     // Use RyuJIT for all NGEN, unless we're falling back to JIT64 for everything.
         (newJitCompiler != nullptr))    // the main JIT must successfully load before we try loading the fallback JIT
     {
diff --git a/src/vm/eventtrace.cpp b/src/vm/eventtrace.cpp
index ccbd32d46b..c55c378005 100644
--- a/src/vm/eventtrace.cpp
+++ b/src/vm/eventtrace.cpp
@@ -4741,6 +4741,27 @@ VOID ETW::InfoLog::RuntimeInformation(INT32 type)
 
 #ifndef FEATURE_CORECLR
             startupFlags = CorHost2::GetStartupFlags();
+
+            // Some of the options specified by the startup flags can be overwritten by config files. 
+            // Strictly speaking since the field in this event is called StartupFlags there's nothing 
+            // wrong with just showing the actual startup flags but it makes it less useful (a more 
+            // appropriate name for the field is StartupOptions).
+            startupFlags &= ~STARTUP_CONCURRENT_GC;
+            if (g_pConfig->GetGCconcurrent())
+                startupFlags |= STARTUP_CONCURRENT_GC;
+
+            if (g_pConfig->DefaultSharePolicy() != AppDomain::SHARE_POLICY_UNSPECIFIED)
+            {
+                startupFlags &= ~STARTUP_LOADER_OPTIMIZATION_MASK;
+                startupFlags |= g_pConfig->DefaultSharePolicy() << 1;
+            }
+
+            startupFlags &= ~STARTUP_LEGACY_IMPERSONATION;
+            startupFlags &= ~STARTUP_ALWAYSFLOW_IMPERSONATION;
+            if (g_pConfig->ImpersonationMode() == IMP_NOFLOW)
+                startupFlags |= STARTUP_LEGACY_IMPERSONATION;
+            else if (g_pConfig->ImpersonationMode() == IMP_ALWAYSFLOW)
+                startupFlags |= STARTUP_ALWAYSFLOW_IMPERSONATION;
 #endif //!FEATURE_CORECLR
 
             // Determine the startupmode
@@ -6391,12 +6412,105 @@ VOID ETW::MethodLog::SendEventsForNgenMethods(Module *pModule, DWORD dwEventOpti
 #endif // FEATURE_PREJIT
 }
 
+// Called be ETW::MethodLog::SendEventsForJitMethods
+// Sends the ETW events once our caller determines whether or not rejit locks can be acquired
+VOID ETW::MethodLog::SendEventsForJitMethodsHelper(BaseDomain *pDomainFilter,
+                                                   LoaderAllocator *pLoaderAllocatorFilter,
+                                                   DWORD dwEventOptions,
+                                                   BOOL fLoadOrDCStart,
+                                                   BOOL fUnloadOrDCEnd,
+                                                   BOOL fSendMethodEvent,
+                                                   BOOL fSendILToNativeMapEvent,
+                                                   BOOL fGetReJitIDs)
+{
+    CONTRACTL{
+        THROWS;
+        GC_NOTRIGGER;
+    } CONTRACTL_END;
+
+    EEJitManager::CodeHeapIterator heapIterator(pDomainFilter, pLoaderAllocatorFilter);
+    while (heapIterator.Next())
+    {
+        MethodDesc * pMD = heapIterator.GetMethod();
+        if (pMD == NULL)
+            continue;
+
+        TADDR codeStart = heapIterator.GetMethodCode();
+
+        // Grab rejitID from the rejit manager. In some cases, such as collectible loader
+        // allocators, we don't support rejit so we need to short circuit the call.
+        // This also allows our caller to avoid having to pre-enter the rejit
+        // manager locks.
+        // see code:#TableLockHolder
+        ReJITID rejitID =
+            fGetReJitIDs ? pMD->GetReJitManager()->GetReJitIdNoLock(pMD, codeStart) : 0;
+
+        // There are small windows of time where the heap iterator may come across a
+        // codeStart that is not yet published to the MethodDesc. This may happen if
+        // we're JITting the method right now on another thread, and have not completed
+        // yet. Detect the race, and skip the method if appropriate. (If rejitID is
+        // nonzero, there is no race, as GetReJitIdNoLock will not return a nonzero
+        // rejitID if the codeStart has not yet been published for that rejitted version
+        // of the method.) This check also catches recompilations due to EnC, which we do
+        // not want to issue events for, in order to ensure xperf's assumption that
+        // MethodDesc* + ReJITID + extent (hot vs. cold) form a unique key for code
+        // ranges of methods
+        if ((rejitID == 0) && (codeStart != PCODEToPINSTR(pMD->GetNativeCode())))
+            continue;
+
+        // When we're called to announce loads, then the methodload event itself must
+        // precede any supplemental events, so that the method load or method jitting
+        // event is the first event the profiler sees for that MethodID (and not, say,
+        // the MethodILToNativeMap event.)
+        if (fLoadOrDCStart)
+        {
+            if (fSendMethodEvent)
+            {
+                ETW::MethodLog::SendMethodEvent(
+                    pMD,
+                    dwEventOptions,
+                    TRUE,           // bIsJit
+                    NULL,           // namespaceOrClassName
+                    NULL,           // methodName
+                    NULL,           // methodSignature
+                    codeStart,
+                    rejitID);
+            }
+        }
+
+        // Send any supplemental events requested for this MethodID
+        if (fSendILToNativeMapEvent)
+            ETW::MethodLog::SendMethodILToNativeMapEvent(pMD, dwEventOptions, rejitID);
+
+        // When we're called to announce unloads, then the methodunload event itself must
+        // come after any supplemental events, so that the method unload event is the
+        // last event the profiler sees for this MethodID
+        if (fUnloadOrDCEnd)
+        {
+            if (fSendMethodEvent)
+            {
+                ETW::MethodLog::SendMethodEvent(
+                    pMD,
+                    dwEventOptions,
+                    TRUE,           // bIsJit
+                    NULL,           // namespaceOrClassName
+                    NULL,           // methodName
+                    NULL,           // methodSignature
+                    codeStart,
+                    rejitID);
+            }
+        }
+    }
+}
+
 /****************************************************************************/
 /* This routine sends back method events of type 'dwEventOptions', for all 
    JITed methods in either a given LoaderAllocator (if pLoaderAllocatorFilter is non NULL) 
    or in a given Domain (if pDomainFilter is non NULL) or for
    all methods (if both filters are null) */ 
 /****************************************************************************/
+// Code review indicates this method is never called with both filters NULL. Ideally we would
+// assert this and change the comment above, but given I am making a change late in the release I am being cautious
 VOID ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAllocator *pLoaderAllocatorFilter, DWORD dwEventOptions)
 {
     CONTRACTL {
@@ -6424,9 +6538,6 @@ VOID ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAl
                 (ETW::EnumerationLog::EnumerationStructs::MethodDCStartILToNativeMap |
                 ETW::EnumerationLog::EnumerationStructs::MethodDCEndILToNativeMap)) != 0;
 
-        BOOL fCollectibleLoaderAllocatorFilter = 
-            ((pLoaderAllocatorFilter != NULL) && (pLoaderAllocatorFilter->IsCollectible()));
-
         if (fSendILToNativeMapEvent)
         {
             // The call to SendMethodILToNativeMapEvent assumes that the debugger's lazy
@@ -6439,83 +6550,48 @@ VOID ETW::MethodLog::SendEventsForJitMethods(BaseDomain *pDomainFilter, LoaderAl
             g_pDebugInterface->InitializeLazyDataIfNecessary();
         }
 
-        // GetRejitIdNoLock requires that the rejit lock is taken already. We need to take
-        // it here, before CodeHeapIterator takes the SingleUseLock because that is defined
-        // ordering.
-        ReJitManager::TableLockHolder lksharedRejitMgrModule(SharedDomain::GetDomain()->GetReJitManager());
-        ReJitManager::TableLockHolder lkRejitMgrModule(pDomainFilter->GetReJitManager());
-        EEJitManager::CodeHeapIterator heapIterator(pDomainFilter, pLoaderAllocatorFilter);
-        while(heapIterator.Next())
+        // #TableLockHolder:
+        // 
+        // A word about ReJitManager::TableLockHolder... As we enumerate through the functions,
+        // we may need to grab their ReJITIDs. The ReJitManager grabs its table Crst in order to
+        // fetch these. However, several other kinds of locks are being taken during this
+        // enumeration, such as the SystemDomain lock and the EEJitManager::CodeHeapIterator's
+        // lock. In order to avoid lock-leveling issues, we grab the appropriate ReJitManager
+        // table locks after SystemDomain and before CodeHeapIterator. In particular, we need to
+        // grab the SharedDomain's ReJitManager table lock as well as the specific AppDomain's
+        // ReJitManager table lock for the current AppDomain we're iterating. Why the SharedDomain's
+        // ReJitManager lock? For any given AppDomain we're iterating over, the MethodDescs we
+        // find may be managed by that AppDomain's ReJitManger OR the SharedDomain's ReJitManager.
+        // (This is due to generics and whether given instantiations may be shared based on their
+        // arguments.) Therefore, we proactively take the SharedDomain's ReJitManager's table
+        // lock up front, and then individually take the appropriate AppDomain's ReJitManager's
+        // table lock that corresponds to the domain or module we're currently iterating over.
+        //
+
+        // We only support getting rejit IDs when filtering by domain.
+        if (pDomainFilter)
+        {
+            ReJitManager::TableLockHolder lkRejitMgrSharedDomain(SharedDomain::GetDomain()->GetReJitManager());
+            ReJitManager::TableLockHolder lkRejitMgrModule(pDomainFilter->GetReJitManager());
+            SendEventsForJitMethodsHelper(pDomainFilter,
+                pLoaderAllocatorFilter,
+                dwEventOptions,
+                fLoadOrDCStart,
+                fUnloadOrDCEnd,
+                fSendMethodEvent,
+                fSendILToNativeMapEvent,
+                TRUE);
+        }
+        else
         {
-            MethodDesc * pMD = heapIterator.GetMethod();
-            if (pMD == NULL)
-                continue;
-
-            TADDR codeStart = heapIterator.GetMethodCode();
-        
-            // Grab rejitID from the rejit manager. Short-circuit the call if we're filtering
-            // by a collectible loader allocator, since rejit is not supported on RefEmit
-            // assemblies.
-            ReJITID rejitID = 
-                fCollectibleLoaderAllocatorFilter ?
-                0 :
-                pMD->GetReJitManager()->GetReJitIdNoLock(pMD, codeStart);
-
-            // There are small windows of time where the heap iterator may come across a
-            // codeStart that is not yet published to the MethodDesc. This may happen if
-            // we're JITting the method right now on another thread, and have not completed
-            // yet. Detect the race, and skip the method if appropriate. (If rejitID is
-            // nonzero, there is no race, as GetReJitIdNoLock will not return a nonzero
-            // rejitID if the codeStart has not yet been published for that rejitted version
-            // of the method.) This check also catches recompilations due to EnC, which we do
-            // not want to issue events for, in order to ensure xperf's assumption that
-            // MethodDesc* + ReJITID + extent (hot vs. cold) form a unique key for code
-            // ranges of methods
-            if ((rejitID == 0) && (codeStart != PCODEToPINSTR(pMD->GetNativeCode())))
-                continue;
-
-            // When we're called to announce loads, then the methodload event itself must
-            // precede any supplemental events, so that the method load or method jitting
-            // event is the first event the profiler sees for that MethodID (and not, say,
-            // the MethodILToNativeMap event.)
-            if (fLoadOrDCStart)
-            {
-                if (fSendMethodEvent)
-                {
-                    ETW::MethodLog::SendMethodEvent(
-                        pMD, 
-                        dwEventOptions, 
-                        TRUE,           // bIsJit
-                        NULL,           // namespaceOrClassName
-                        NULL,           // methodName
-                        NULL,           // methodSignature
-                        codeStart,
-                        rejitID);
-                }
-            }
-
-            // Send any supplemental events requested for this MethodID
-            if (fSendILToNativeMapEvent)
-                ETW::MethodLog::SendMethodILToNativeMapEvent(pMD, dwEventOptions, rejitID);
-
-            // When we're called to announce unloads, then the methodunload event itself must
-            // come after any supplemental events, so that the method unload event is the
-            // last event the profiler sees for this MethodID
-            if (fUnloadOrDCEnd)
-            {
-                if (fSendMethodEvent)
-                {
-                    ETW::MethodLog::SendMethodEvent(
-                        pMD, 
-                        dwEventOptions, 
-                        TRUE,           // bIsJit
-                        NULL,           // namespaceOrClassName
-                        NULL,           // methodName
-                        NULL,           // methodSignature
-                        codeStart,
-                        rejitID);
-                }
-            }
+            SendEventsForJitMethodsHelper(pDomainFilter,
+                pLoaderAllocatorFilter,
+                dwEventOptions,
+                fLoadOrDCStart,
+                fUnloadOrDCEnd,
+                fSendMethodEvent,
+                fSendILToNativeMapEvent,
+                FALSE);
         }
     } EX_CATCH{} EX_END_CATCH(SwallowAllExceptions);
 #endif // !DACCESS_COMPILE
@@ -6859,6 +6935,12 @@ VOID ETW::EnumerationLog::EnumerationHelper(Module *moduleFilter, BaseDomain *do
         // Thus hitting a timeout due to a large number of methods will not affect modules rundown.tf g
         ETW::EnumerationLog::IterateModule(moduleFilter, enumerationOptions);
 
+        // As best I can tell from code review, these if statements below are never true. There is
+        // only one caller to this method that specifies a moduleFilter, ETW::LoaderLog::ModuleLoad.
+        // That method never specifies these flags. Because it is late in a release cycle I am not
+        // making a change, but if you see this comment early in the next release cycle consider
+        // deleting this apparently dead code.
+
         // DC End or Unload Jit Method events from all Domains
         if (enumerationOptions & ETW::EnumerationLog::EnumerationStructs::JitMethodUnloadOrDCEndAny)
         {
@@ -6895,6 +6977,7 @@ VOID ETW::EnumerationLog::EnumerationHelper(Module *moduleFilter, BaseDomain *do
                     ETW::EnumerationLog::IterateAppDomain(pDomain, enumerationOptions);
                 }
             }
+
             ETW::EnumerationLog::IterateDomain(SharedDomain::GetDomain(), enumerationOptions);
         }    
     }    
diff --git a/src/vm/gcinfodecoder.cpp b/src/vm/gcinfodecoder.cpp
index 3fd6ca2fce..2d886d4ad4 100644
--- a/src/vm/gcinfodecoder.cpp
+++ b/src/vm/gcinfodecoder.cpp
@@ -53,6 +53,22 @@
     } while (0)
 #endif // !VALIDATE_ROOT
 
+#ifndef LOG_PIPTR
+#define LOG_PIPTR(pObjRef, gcFlags, hCallBack)                                                                                                  \
+    {                                                                                                                                           \
+        GCCONTEXT* pGCCtx = (GCCONTEXT*)(hCallBack);                                                                                            \
+        if (pGCCtx->sc->promotion)                                                                                                              \
+        {                                                                                                                                       \
+            LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */                                                                                      \
+                LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR)))); \
+        }                                                                                                                                       \
+        else                                                                                                                                    \
+        {                                                                                                                                       \
+            LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */                                                                                      \
+                LOG_PIPTR_OBJECT(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR))));       \
+        }                                                                                                                                       \
+    }
+#endif // !LOG_PIPTR
 
 bool GcInfoDecoder::SetIsInterruptibleCB (UINT32 startOffset, UINT32 stopOffset, LPVOID hCallback)
 {
@@ -1559,8 +1575,7 @@ void GcInfoDecoder::ReportRegisterToGC(  // AMD64
 
     VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef);
 
-    LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */
-         LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR))));
+    LOG_PIPTR(pObjRef, gcFlags, hCallBack);
 #endif //_DEBUG
 
     gcFlags |= CHECK_APP_DOMAIN;
@@ -1657,8 +1672,7 @@ void GcInfoDecoder::ReportRegisterToGC(  // ARM
 
     VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef);
 
-    LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */
-         LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR))));
+    LOG_PIPTR(pObjRef, gcFlags, hCallBack);
 #endif //_DEBUG
 
     gcFlags |= CHECK_APP_DOMAIN;
@@ -1752,8 +1766,7 @@ void GcInfoDecoder::ReportRegisterToGC( // ARM64
 
     VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef);
 
-    LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */
-         LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR))));
+    LOG_PIPTR(pObjRef, gcFlags, hCallBack);
 #endif //_DEBUG
 
     gcFlags |= CHECK_APP_DOMAIN;
@@ -1893,8 +1906,7 @@ void GcInfoDecoder::ReportStackSlotToGC(
 
     VALIDATE_ROOT((gcFlags & GC_CALL_INTERIOR), hCallBack, pObjRef);
 
-    LOG((LF_GCROOTS, LL_INFO1000, /* Part Three */
-         LOG_PIPTR_OBJECT_CLASS(OBJECTREF_TO_UNCHECKED_OBJECTREF(*pObjRef), (gcFlags & GC_CALL_PINNED), (gcFlags & GC_CALL_INTERIOR))));
+    LOG_PIPTR(pObjRef, gcFlags, hCallBack);
 #endif
 
     gcFlags |= CHECK_APP_DOMAIN;
diff --git a/src/vm/jitinterface.cpp b/src/vm/jitinterface.cpp
index c12351adc5..a50590aac9 100644
--- a/src/vm/jitinterface.cpp
+++ b/src/vm/jitinterface.cpp
@@ -7850,7 +7850,6 @@ CorInfoInline CEEInfo::canInline (CORINFO_METHOD_HANDLE hCaller,
         }
     }
 
-
 #ifdef PROFILING_SUPPORTED
     if (CORProfilerPresent())
     {
diff --git a/src/vm/rejit.cpp b/src/vm/rejit.cpp
index 33ec16b83f..0233da062e 100644
--- a/src/vm/rejit.cpp
+++ b/src/vm/rejit.cpp
@@ -1498,9 +1498,16 @@ HRESULT ReJitManager::DoJumpStampIfNecessary(MethodDesc* pMD, PCODE pCode)
     pInfoToJumpStamp = FindPreReJittedReJitInfo(beginIter, endIter);
     if (pInfoToJumpStamp != NULL)
     {
-        // Found it.  Jump-stamp, SetNativeCode, and we're done.
         _ASSERTE(pInfoToJumpStamp->GetMethodDesc() == pMD);
-        return pInfoToJumpStamp->JumpStampNativeCode(pCode);
+        // does it need to be jump-stamped?
+        if (pInfoToJumpStamp->GetState() != ReJitInfo::kJumpNone)
+        {
+            return S_OK;
+        }
+        else
+        {
+            return pInfoToJumpStamp->JumpStampNativeCode(pCode);
+        }
     }
 
     // In this case, try looking up by module / metadata token.  This is the case where
@@ -1521,6 +1528,19 @@ HRESULT ReJitManager::DoJumpStampIfNecessary(MethodDesc* pMD, PCODE pCode)
         return S_OK;
     }
 
+    // The placeholder may already have a rejit info for this MD, in which
+    // case we don't need to do any additional work
+    for (ReJitInfo * pInfo = pInfoPlaceholder->m_pShared->GetMethods(); pInfo != NULL; pInfo = pInfo->m_pNext)
+    {
+        if ((pInfo->GetKey().m_keyType == ReJitInfo::Key::kMethodDesc) &&
+            (pInfo->GetMethodDesc() == pMD))
+        {
+            // Any rejit info we find should already be jumpstamped
+            _ASSERTE(pInfo->GetState() != ReJitInfo::kJumpNone);
+            return S_OK;
+        }
+    }
+
 #ifdef _DEBUG
     {
         Module * pModuleTest = NULL;
diff --git a/src/zap/zapimage.cpp b/src/zap/zapimage.cpp
index 1439bbad11..67bad6d1ee 100644
--- a/src/zap/zapimage.cpp
+++ b/src/zap/zapimage.cpp
@@ -1147,6 +1147,76 @@ HANDLE ZapImage::GenerateFile(LPCWSTR wszOutputFileName, CORCOMPILE_NGEN_SIGNATU
     return hFile;
 }
 
+#ifdef FEATURE_FUSION
+#define WOF_PROVIDER_FILE           (0x00000002)
+
+typedef BOOL (WINAPI *WofShouldCompressBinaries_t) (
+    __in LPCWSTR Volume,
+    __out PULONG Algorithm
+    );
+
+typedef HRESULT (WINAPI *WofSetFileDataLocation_t) (
+    __in HANDLE hFile,
+    __out ULONG Provider,
+    __in PVOID FileInfo,
+    __in ULONG Length
+    );
+
+typedef struct _WOF_FILE_COMPRESSION_INFO {
+    ULONG Algorithm;
+} WOF_FILE_COMPRESSION_INFO, *PWOF_FILE_COMPRESSION_INFO;
+
+// Check if files on the volume identified by volumeLetter should be compressed.
+// If yes, compress the file associated with hFile.
+static void CompressFile(WCHAR volumeLetter, HANDLE hFile)
+{
+    if (IsNgenOffline())
+    {
+        return;
+    }
+
+    // Wofutil.dll is available on Windows 8.1 and above. Return on platforms without wofutil.dll.
+    HModuleHolder wofLibrary(WszLoadLibraryEx(L"wofutil.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32));
+    if (wofLibrary == nullptr)
+    {
+        return;
+    }
+
+    // WofShouldCompressBinaries is available on Windows 10 and above.
+    // Windows 8.1 version of wofutil.dll does not have this function.
+    WofShouldCompressBinaries_t WofShouldCompressBinaries
+        = (WofShouldCompressBinaries_t)GetProcAddress(wofLibrary, "WofShouldCompressBinaries");
+    if (WofShouldCompressBinaries == nullptr)
+    {
+        return;
+    }
+
+    WCHAR volume[4] = L"X:\\";
+    volume[0] = volumeLetter;
+    ULONG algorithm = 0;
+
+    bool compressionSuitable = (WofShouldCompressBinaries(volume, &algorithm) == TRUE);
+    if (compressionSuitable)
+    {
+        // WofSetFileDataLocation is available on Windows 8.1 and above, however, Windows 8.1 version
+        // of WofSetFileDataLocation works for WIM only, and Windows 10 is required for compression of
+        // normal files.  This isn't a problem for us, since the check for WofShouldCompressBinaries
+        // above should have already returned on Windows 8.1.
+        WofSetFileDataLocation_t WofSetFileDataLocation = 
+            (WofSetFileDataLocation_t)GetProcAddress(wofLibrary, "WofSetFileDataLocation");
+        if (WofSetFileDataLocation == nullptr)
+        {
+            return;
+        }
+
+        WOF_FILE_COMPRESSION_INFO fileInfo;
+        fileInfo.Algorithm = algorithm;
+
+        WofSetFileDataLocation(hFile, WOF_PROVIDER_FILE, &fileInfo, sizeof(WOF_FILE_COMPRESSION_INFO));
+    }
+}
+#endif
+
 HANDLE ZapImage::SaveImage(LPCWSTR wszOutputFileName, CORCOMPILE_NGEN_SIGNATURE * pNativeImageSig)
 {
     if (!IsReadyToRunCompilation())
@@ -1174,6 +1244,10 @@ HANDLE ZapImage::SaveImage(LPCWSTR wszOutputFileName, CORCOMPILE_NGEN_SIGNATURE
         PrintStats(wszOutputFileName);
 #endif
 
+#ifdef FEATURE_FUSION
+    CompressFile(wszOutputFileName[0], hFile);
+#endif
+
     return hFile;
 }
 
diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp
index 6cddacfc11..56835cac94 100644
--- a/src/zap/zapper.cpp
+++ b/src/zap/zapper.cpp
@@ -905,10 +905,6 @@ void Zapper::InitEE(BOOL fForceDebug, BOOL fForceProfile, BOOL fForceInstrument)
     static ConfigDWORD useRyuJitValue;
     bool fUseRyuJit = (useRyuJitValue.val(CLRConfig::INTERNAL_UseRyuJit) == 1);
 
-    // ****** TODO: Until the registry value is set by the .NET 4.6 installer, we pretend .NET 4.6 has been installed, which causes
-    // ******       RyuJit to be used by default.
-    fUseRyuJit = true;
-
     if (!fUseRyuJit)        // Do we need to fall back to JIT64 for NGEN?
     {
         LPCWSTR pwzJitName = MAKEDLLNAME_W(L"compatjit");
-- 
2.34.1