Modify affinity range config format for Windows

[platform/upstream/coreclr.git] / src / gc / gc.cpp
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp

index c5c178b..0dc7e36 100644 (file)
--- a/src/gc/gc.cpp
+++ b/src/gc/gc.cpp
@@ -18,10 +18,31 @@
  //
  
  #include "gcpriv.h"
-#include "softwarewritewatch.h"
  
  #define USE_INTROSORT
  
+// We just needed a simple random number generator for testing.
+class gc_rand
+{
+public:
+    static uint64_t x;
+
+    static uint64_t get_rand() 
+    {
+           x = (314159269*x+278281) & 0x7FFFFFFF;
+           return x;
+    }
+
+    // obtain random number in the range 0 .. r-1
+    static uint64_t get_rand(uint64_t r) {
+           // require r >= 0
+           uint64_t x = (uint64_t)((get_rand() * r) >> 31);
+           return x;
+    }
+};
+
+uint64_t gc_rand::x = 0;
+
  #if defined(BACKGROUND_GC) && defined(FEATURE_EVENT_TRACE)
  BOOL bgc_heap_walk_for_etw_p = FALSE;
  #endif //BACKGROUND_GC && FEATURE_EVENT_TRACE
@@ -33,6 +54,7 @@ BOOL bgc_heap_walk_for_etw_p = FALSE;
  #endif // FEATURE_REDHAWK
  
  #define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0))
+#define commit_min_th (16*OS_PAGE_SIZE)
  
  #ifdef SERVER_GC
  #define partial_size_th 100
@@ -53,13 +75,8 @@ BOOL bgc_heap_walk_for_etw_p = FALSE;
  #define LOH_PIN_QUEUE_LENGTH 100
  #define LOH_PIN_DECAY 10
  
-#ifdef BIT64
-// Right now we support maximum 1024 procs - meaning that we will create at most
-// that many GC threads and GC heaps. 
-#define MAX_SUPPORTED_CPUS 1024
-#else
-#define MAX_SUPPORTED_CPUS 64
-#endif // BIT64
+uint32_t yp_spin_count_unit = 0;
+size_t loh_size_threshold = LARGE_OBJECT_SIZE;
  
  #ifdef GC_CONFIG_DRIVEN
  int compact_ratio = 0;
@@ -91,10 +108,9 @@ const char * const allocation_state_str[] = {
      "start",
      "can_allocate",
      "cant_allocate",
+    "retry_allocate",
      "try_fit",
      "try_fit_new_seg",
-    "try_fit_new_seg_after_cg",
-    "try_fit_no_seg",
      "try_fit_after_cg",
      "try_fit_after_bgc",
      "try_free_full_seg_in_bgc", 
@@ -109,8 +125,26 @@ const char * const allocation_state_str[] = {
      "trigger_2nd_ephemeral_gc",
      "check_retry_seg"
  };
+
+const char * const msl_take_state_str[] = {
+    "get_large_seg",
+    "bgc_loh_sweep",
+    "wait_bgc",
+    "block_gc",
+    "clr_mem",
+    "clr_large_mem",
+    "t_eph_gc",
+    "t_full_gc",
+    "alloc_small",
+    "alloc_large",
+    "alloc_small_cant",
+    "alloc_large_cant",
+    "try_alloc",
+    "try_budget"
+};
  #endif //TRACE_GC && !DACCESS_COMPILE
  
+
  // Keep this in sync with the definition of gc_reason
  #if (defined(DT_LOG) || defined(TRACE_GC)) && !defined (DACCESS_COMPILE)
  static const char* const str_gc_reasons[] = 
@@ -125,7 +159,10 @@ static const char* const str_gc_reasons[] =
      "induced_noforce",
      "gcstress",
      "induced_lowmem",
-    "induced_compacting"
+    "induced_compacting",
+    "lowmemory_host",
+    "pm_full_gc",
+    "lowmemory_host_blocking"
  };
  
  static const char* const str_gc_pause_modes[] = 
@@ -145,7 +182,9 @@ BOOL is_induced (gc_reason reason)
              (reason == reason_induced_noforce) ||
              (reason == reason_lowmemory) ||
              (reason == reason_lowmemory_blocking) || 
-            (reason == reason_induced_compacting));
+            (reason == reason_induced_compacting) ||
+            (reason == reason_lowmemory_host) || 
+            (reason == reason_lowmemory_host_blocking));
  }
  
  inline
@@ -153,7 +192,8 @@ BOOL is_induced_blocking (gc_reason reason)
  {
      return ((reason == reason_induced) ||
              (reason == reason_lowmemory_blocking) || 
-            (reason == reason_induced_compacting));
+            (reason == reason_induced_compacting) ||
+            (reason == reason_lowmemory_host_blocking));
  }
  
  #ifndef DACCESS_COMPILE
@@ -167,7 +207,6 @@ size_t GetHighPrecisionTimeStamp()
  }
  #endif
  
-
  #ifdef GC_STATS
  // There is a current and a prior copy of the statistics.  This allows us to display deltas per reporting
  // interval, as well as running totals.  The 'min' and 'max' values require special treatment.  They are
@@ -208,10 +247,8 @@ void GCStatistics::AddGCStats(const gc_mechanisms& settings, size_t timeInMSec)
  
      if (is_induced (settings.reason))
          cntReasons[(int)reason_induced]++;
-#ifdef STRESS_HEAP
      else if (settings.stress_induced)
          cntReasons[(int)reason_gcstress]++;
-#endif // STRESS_HEAP
      else
          cntReasons[(int)settings.reason]++;
  
@@ -308,78 +345,69 @@ void GCStatistics::DisplayAndUpdate()
  
  #endif // GC_STATS
  
-#ifdef BIT64
-#define TOTAL_TIMES_TO_SHIFT 6
-#else
-#define TOTAL_TIMES_TO_SHIFT 5
-#endif // BIT64
-
+inline
  size_t round_up_power2 (size_t size)
  {
-    unsigned short shift = 1;
-    size_t shifted = 0;
-
-    size--;
-    for (unsigned short i = 0; i < TOTAL_TIMES_TO_SHIFT; i++)
-    {
-        shifted = size | (size >> shift);
-        if (shifted == size)
-        {
-            break;
-        }
-
-        size = shifted;
-        shift <<= 1;
-    }
-    shifted++;
+    // Get the 0-based index of the most-significant bit in size-1.
+    // If the call failed (because size-1 is zero), size must be 1,
+    // so return 1 (because 1 rounds up to itself).
+    DWORD highest_set_bit_index;
+    if (0 ==
+#ifdef BIT64
+        BitScanReverse64(
+#else
+        BitScanReverse(
+#endif
+            &highest_set_bit_index, size - 1)) { return 1; }
  
-    return shifted;
+    // The size == 0 case (which would have overflowed to SIZE_MAX when decremented)
+    // is handled below by relying on the fact that highest_set_bit_index is the maximum value
+    // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that
+    // number of bits shifts in zeros from the right, resulting in an output of zero.
+    return static_cast<size_t>(2) << highest_set_bit_index;
  }
  
  inline
  size_t round_down_power2 (size_t size)
  {
-    size_t power2 = round_up_power2 (size);
-
-    if (power2 != size)
-    {
-        power2 >>= 1;
-    }
+    // Get the 0-based index of the most-significant bit in size.
+    // If the call failed, size must be zero so return zero.
+    DWORD highest_set_bit_index;
+    if (0 ==
+#ifdef BIT64
+        BitScanReverse64(
+#else
+        BitScanReverse(
+#endif
+            &highest_set_bit_index, size)) { return 0; }
  
-    return power2;
+    // Left-shift 1 by highest_set_bit_index to get back a value containing only
+    // the most-significant set bit of size, i.e. size rounded down
+    // to the next power-of-two value.
+    return static_cast<size_t>(1) << highest_set_bit_index;
  }
  
-// the index starts from 0.
-int index_of_set_bit (size_t power2)
+// Get the 0-based index of the most-significant bit in the value.
+// Returns -1 if the input value is zero (i.e. has no set bits).
+inline
+int index_of_highest_set_bit (size_t value)
  {
-    int low = 0;
-    int high = sizeof (size_t) * 8 - 1;
-    int mid; 
-    while (low <= high)
-    {
-        mid = ((low + high)/2);
-        size_t temp = (size_t)1 << mid;
-        if (power2 & temp)
-        {
-            return mid;
-        }
-        else if (power2 < temp)
-        {
-            high = mid - 1;
-        }
-        else
-        {
-            low = mid + 1;
-        }
-    }
-
-    return -1;
+    // Get the 0-based index of the most-significant bit in the value.
+    // If the call failed (because value is zero), return -1.
+    DWORD highest_set_bit_index;
+    return (0 ==
+#ifdef BIT64
+        BitScanReverse64(
+#else
+        BitScanReverse(
+#endif
+            &highest_set_bit_index, value)) ? -1 : static_cast<int>(highest_set_bit_index);
  }
  
  inline
  int relative_index_power2_plug (size_t power2)
  {
-    int index = index_of_set_bit (power2);
+    int index = index_of_highest_set_bit (power2);
      assert (index <= MAX_INDEX_POWER2);
  
      return ((index < MIN_INDEX_POWER2) ? 0 : (index - MIN_INDEX_POWER2));
@@ -388,7 +416,7 @@ int relative_index_power2_plug (size_t power2)
  inline
  int relative_index_power2_free_space (size_t power2)
  {
-    int index = index_of_set_bit (power2);
+    int index = index_of_highest_set_bit (power2);
      assert (index <= MAX_INDEX_POWER2);
  
      return ((index < MIN_INDEX_POWER2) ? -1 : (index - MIN_INDEX_POWER2));
@@ -495,7 +523,7 @@ void log_va_msg(const char *fmt, va_list args)
      int pid_len = sprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, "[%5d]", (uint32_t)GCToOSInterface::GetCurrentThreadIdForLogging());
      buffer_start += pid_len;
      memset(&pBuffer[buffer_start], '-', BUFFERSIZE - buffer_start);
-    int msg_len = _vsnprintf_s(&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args );
+    int msg_len = _vsnprintf_s (&pBuffer[buffer_start], BUFFERSIZE - buffer_start, _TRUNCATE, fmt, args);
      if (msg_len == -1)
      {
          msg_len = BUFFERSIZE - buffer_start;
@@ -703,8 +731,10 @@ enum gc_join_flavor
      join_flavor_server_gc = 0,
      join_flavor_bgc = 1
  };
-
+  
  #define first_thread_arrived 2
+#pragma warning(push)
+#pragma warning(disable:4324) // don't complain if DECLSPEC_ALIGN actually pads
  struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure
  {
      // Shared non volatile keep on separate line to prevent eviction
@@ -723,6 +753,7 @@ struct DECLSPEC_ALIGN(HS_CACHE_LINE_SIZE) join_structure
      VOLATILE(int32_t) r_join_lock;
  
  };
+#pragma warning(pop)
  
  enum join_type 
  {
@@ -840,7 +871,7 @@ public:
              if (color == join_struct.lock_color.LoadWithoutBarrier())
              {
  respin:
-                int spin_count = 4096 * (gc_heap::n_heaps - 1);
+                int spin_count = 128 * yp_spin_count_unit;
                  for (int j = 0; j < spin_count; j++)
                  {
                      if (color != join_struct.lock_color.LoadWithoutBarrier())
@@ -857,9 +888,9 @@ respin:
                          flavor, join_id, color, (int32_t)(join_struct.join_lock)));
  
                      //Thread* current_thread = GCToEEInterface::GetThread();
-                    //BOOL cooperative_mode = gc_heap::enable_preemptive (current_thread);
+                    //BOOL cooperative_mode = gc_heap::enable_preemptive ();
                      uint32_t dwJoinWait = join_struct.joined_event[color].Wait(INFINITE, FALSE);
-                    //gc_heap::disable_preemptive (current_thread, cooperative_mode);
+                    //gc_heap::disable_preemptive (cooperative_mode);
  
                      if (dwJoinWait != WAIT_OBJECT_0)
                      {
@@ -929,7 +960,7 @@ respin:
                  if (!join_struct.wait_done)
                  {
          respin:
-                    int spin_count = 2 * 4096 * (gc_heap::n_heaps - 1);
+                    int spin_count = 256 * yp_spin_count_unit;
                      for (int j = 0; j < spin_count; j++)
                      {
                          if (join_struct.wait_done)
@@ -1594,7 +1625,7 @@ void WaitLongerNoInstru (int i)
      {
          if  (g_num_processors > 1)
          {
-            YieldProcessor();           // indicate to the processor that we are spining
+            YieldProcessor();           // indicate to the processor that we are spinning
              if  (i & 0x01f)
                  GCToOSInterface::YieldThread (0);
              else
@@ -1659,15 +1690,15 @@ retry:
                  if  (g_num_processors > 1)
                  {
  #ifndef MULTIPLE_HEAPS
-                    int spin_count = 1024 * g_num_processors;
+                    int spin_count = 32 * yp_spin_count_unit;
  #else //!MULTIPLE_HEAPS
-                    int spin_count = 32 * g_num_processors;
+                    int spin_count = yp_spin_count_unit;
  #endif //!MULTIPLE_HEAPS
                      for (int j = 0; j < spin_count; j++)
                      {
                          if  (VolatileLoad(lock) < 0 || IsGCInProgress())
                              break;
-                        YieldProcessor();           // indicate to the processor that we are spining
+                        YieldProcessor();           // indicate to the processor that we are spinning
                      }
                      if  (VolatileLoad(lock) >= 0 && !IsGCInProgress())
                      {
@@ -1762,7 +1793,7 @@ void WaitLonger (int i
  #endif //SYNCHRONIZATION_STATS
          if  (g_num_processors > 1)
          {
-            YieldProcessor();           // indicate to the processor that we are spining
+            YieldProcessor();           // indicate to the processor that we are spinning
              if  (i & 0x01f)
                  GCToOSInterface::YieldThread (0);
              else
@@ -1805,15 +1836,15 @@ retry:
                  if  (g_num_processors > 1)
                  {
  #ifndef MULTIPLE_HEAPS
-                    int spin_count = 1024 * g_num_processors;
+                    int spin_count = 32 * yp_spin_count_unit;
  #else //!MULTIPLE_HEAPS
-                    int spin_count = 32 * g_num_processors;
+                    int spin_count = yp_spin_count_unit;
  #endif //!MULTIPLE_HEAPS
                      for (int j = 0; j < spin_count; j++)
                      {
                          if  (spin_lock->lock < 0 || gc_heap::gc_started)
                              break;
-                        YieldProcessor();           // indicate to the processor that we are spining
+                        YieldProcessor();           // indicate to the processor that we are spinning
                      }
                      if  (spin_lock->lock >= 0 && !gc_heap::gc_started)
                      {
@@ -2123,7 +2154,7 @@ uint8_t* gc_heap::pad_for_alignment_large (uint8_t* newAlloc, int requiredAlignm
  #define CLR_SIZE ((size_t)(8*1024))
  #endif //SERVER_GC
  
-#define END_SPACE_AFTER_GC (LARGE_OBJECT_SIZE + MAX_STRUCTALIGN)
+#define END_SPACE_AFTER_GC (loh_size_threshold + MAX_STRUCTALIGN)
  
  #ifdef BACKGROUND_GC
  #define SEGMENT_INITIAL_COMMIT (2*OS_PAGE_SIZE)
@@ -2131,6 +2162,9 @@ uint8_t* gc_heap::pad_for_alignment_large (uint8_t* newAlloc, int requiredAlignm
  #define SEGMENT_INITIAL_COMMIT (OS_PAGE_SIZE)
  #endif //BACKGROUND_GC
  
+// This is always power of 2.
+const size_t min_segment_size_hard_limit = 1024*1024*16;
+
  #ifdef SERVER_GC
  
  #ifdef BIT64
@@ -2316,7 +2350,7 @@ static static_data static_data_table[latency_level_last - latency_level_first +
          // gen0
          {0, 0, 40000, 0.5f, 9.0f, 20.0f, 1000, 1},
          // gen1
-        {163840, 0, 80000, 0.5f, 2.0f, 7.0f, 10000, 10},
+        {160*1024, 0, 80000, 0.5f, 2.0f, 7.0f, 10000, 10},
          // gen2
          {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100},
          // gen3
@@ -2334,7 +2368,7 @@ static static_data static_data_table[latency_level_last - latency_level_first +
  #endif //MULTIPLE_HEAPS
              1000, 1},
          // gen1
-        {9*32*1024, 0, 80000, 0.5f, 2.0f, 7.0f, 10000, 10},
+        {256*1024, 0, 80000, 0.5f, 2.0f, 7.0f, 10000, 10},
          // gen2
          {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100},
          // gen3
@@ -2406,8 +2440,8 @@ VOLATILE(BOOL) gc_heap::gc_started;
  #ifdef MULTIPLE_HEAPS
  
  GCEvent     gc_heap::gc_start_event;
-
  bool        gc_heap::gc_thread_no_affinitize_p = false;
+uintptr_t   process_mask = 0;
  
  int         gc_heap::n_heaps;
  
@@ -2452,6 +2486,12 @@ size_t      gc_heap::gc_last_ephemeral_decommit_time = 0;
  
  size_t      gc_heap::gc_gen0_desired_high;
  
+CLRCriticalSection gc_heap::check_commit_cs;
+
+size_t      gc_heap::current_total_committed = 0;
+
+size_t      gc_heap::current_total_committed_bookkeeping = 0;
+
  #ifdef SHORT_PLUGS
  double       gc_heap::short_plugs_pad_ratio = 0;
  #endif //SHORT_PLUGS
@@ -2477,10 +2517,16 @@ uint64_t    gc_heap::mem_one_percent = 0;
  
  uint32_t    gc_heap::high_memory_load_th = 0;
  
+uint32_t    gc_heap::m_high_memory_load_th;
+
+uint32_t    gc_heap::v_high_memory_load_th;
+
  uint64_t    gc_heap::total_physical_mem = 0;
  
  uint64_t    gc_heap::entry_available_physical_mem = 0;
  
+size_t      gc_heap::heap_hard_limit = 0;
+
  #ifdef BACKGROUND_GC
  GCEvent     gc_heap::bgc_start_event;
  
@@ -2530,6 +2576,8 @@ BOOL        gc_heap::elevation_requested = FALSE;
  
  BOOL        gc_heap::last_gc_before_oom = FALSE;
  
+BOOL        gc_heap::sufficient_gen0_space_p = FALSE;
+
  #ifdef BACKGROUND_GC
  uint8_t*    gc_heap::background_saved_lowest_address = 0;
  uint8_t*    gc_heap::background_saved_highest_address = 0;
@@ -2542,6 +2590,8 @@ oom_history gc_heap::oom_info;
  
  fgm_history gc_heap::fgm_result;
  
+size_t      gc_heap::allocated_since_last_gc = 0;
+
  BOOL        gc_heap::ro_segments_in_range;
  
  size_t      gc_heap::gen0_big_free_spaces = 0;
@@ -2605,7 +2655,9 @@ size_t      gc_heap::mark_stack_array_length = 0;
  
  mark*       gc_heap::mark_stack_array = 0;
  
+#if defined (_DEBUG) && defined (VERIFY_HEAP)
  BOOL        gc_heap::verify_pinned_queue_p = FALSE;
+#endif // defined (_DEBUG) && defined (VERIFY_HEAP)
  
  uint8_t*    gc_heap::oldest_pinned_plug = 0;
  
@@ -2804,8 +2856,18 @@ VOLATILE(bool) gc_heap::full_gc_approach_event_set;
  
  size_t gc_heap::full_gc_counts[gc_type_max];
  
+bool gc_heap::maxgen_size_inc_p = false;
+
  BOOL gc_heap::should_expand_in_full_gc = FALSE;
  
+// Provisional mode related stuff.
+bool gc_heap::provisional_mode_triggered = false;
+bool gc_heap::pm_trigger_full_gc = false;
+size_t gc_heap::provisional_triggered_gc_count = 0;
+size_t gc_heap::provisional_off_gc_count = 0;
+size_t gc_heap::num_provisional_triggered = 0;
+bool   gc_heap::pm_stress_on = false;
+
  #ifdef HEAP_ANALYZE
  BOOL        gc_heap::heap_analyze_enabled = FALSE;
  #endif //HEAP_ANALYZE
@@ -2823,7 +2885,9 @@ uint8_t* gc_heap::alloc_allocated = 0;
  
  size_t gc_heap::allocation_quantum = CLR_SIZE;
  
-GCSpinLock gc_heap::more_space_lock;
+GCSpinLock gc_heap::more_space_lock_soh;
+GCSpinLock gc_heap::more_space_lock_loh;
+VOLATILE(int32_t) gc_heap::loh_alloc_thread_count = 0;
  
  #ifdef SYNCHRONIZATION_STATS
  unsigned int gc_heap::good_suspension = 0;
@@ -3020,18 +3084,18 @@ void gc_heap::fire_per_heap_hist_event (gc_history_per_heap* current_gc_data_per
  
  void gc_heap::fire_pevents()
  {
-#ifndef CORECLR
      settings.record (&gc_data_global);
      gc_data_global.print();
  
-    FIRE_EVENT(GCGlobalHeapHistory_V2, gc_data_global.final_youngest_desired, 
-                                  gc_data_global.num_heaps, 
-                                  gc_data_global.condemned_generation, 
-                                  gc_data_global.gen0_reduction_count, 
-                                  gc_data_global.reason, 
-                                  gc_data_global.global_mechanims_p, 
-                                  gc_data_global.pause_mode, 
-                                  gc_data_global.mem_pressure);
+    FIRE_EVENT(GCGlobalHeapHistory_V2, 
+               gc_data_global.final_youngest_desired, 
+               gc_data_global.num_heaps, 
+               gc_data_global.condemned_generation, 
+               gc_data_global.gen0_reduction_count, 
+               gc_data_global.reason, 
+               gc_data_global.global_mechanims_p, 
+               gc_data_global.pause_mode, 
+               gc_data_global.mem_pressure);
  
  #ifdef MULTIPLE_HEAPS
      for (int i = 0; i < gc_heap::n_heaps; i++)
@@ -3044,7 +3108,6 @@ void gc_heap::fire_pevents()
      gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap();
      fire_per_heap_hist_event (current_gc_data_per_heap, heap_number);
  #endif    
-#endif //!CORECLR
  }
  
  inline BOOL
@@ -3069,12 +3132,10 @@ gc_heap::dt_low_ephemeral_space_p (gc_tuning_point tp)
              
              dprintf (GTC_LOG, ("h%d: plan eph size is %Id, new gen0 is %Id", 
                  heap_number, plan_ephemeral_size, new_gen0size));
-
              // If we were in no_gc_region we could have allocated a larger than normal segment,
              // and the next seg we allocate will be a normal sized seg so if we can't fit the new
              // ephemeral generations there, do an ephemeral promotion.
              ret = ((soh_segment_size - segment_info_size) < (plan_ephemeral_size + new_gen0size));
-
              break;
          }
          default:
@@ -3149,22 +3210,9 @@ gc_heap::dt_estimate_reclaim_space_p (gc_tuning_point tp, int gen_number)
          {
              if (gen_number == max_generation)
              {
-                dynamic_data* dd = dynamic_data_of (gen_number);
-                size_t maxgen_allocated = (dd_desired_allocation (dd) - dd_new_allocation (dd));
-                size_t maxgen_total_size = maxgen_allocated + dd_current_size (dd);
-                size_t est_maxgen_surv = (size_t)((float) (maxgen_total_size) * dd_surv (dd));
-                size_t est_maxgen_free = maxgen_total_size - est_maxgen_surv + dd_fragmentation (dd);
-
-                dprintf (GTC_LOG, ("h%d: Total gen2 size: %Id, est gen2 dead space: %Id (s: %d, allocated: %Id), frag: %Id",
-                            heap_number,
-                            maxgen_total_size,
-                            est_maxgen_free, 
-                            (int)(dd_surv (dd) * 100),
-                            maxgen_allocated,
-                            dd_fragmentation (dd)));
+                size_t est_maxgen_free = estimated_reclaim (gen_number);
  
                  uint32_t num_heaps = 1;
-
  #ifdef MULTIPLE_HEAPS
                  num_heaps = gc_heap::n_heaps;
  #endif //MULTIPLE_HEAPS
@@ -3946,11 +3994,7 @@ public:
          Validate(bDeep, FALSE);
      }
  
-    ADIndex GetAppDomainIndex()
-    {
-        return (ADIndex)RH_DEFAULT_DOMAIN_ID;
-    }
-#endif //FEATURE_REDHAWK
+#endif //FEATURE_REDHAWK || BUILD_AS_STANDALONE
  
      /////
      //
@@ -4449,7 +4493,7 @@ void* virtual_alloc (size_t size)
  
      // We don't want (prgmem + size) to be right at the end of the address space 
      // because we'd have to worry about that everytime we do (address + size).
-    // We also want to make sure that we leave LARGE_OBJECT_SIZE at the end 
+    // We also want to make sure that we leave loh_size_threshold at the end 
      // so we allocate a small object we don't need to worry about overflow there
      // when we do alloc_ptr+size.
      if (prgmem)
@@ -4702,6 +4746,9 @@ gc_heap::soh_get_segment_to_expand()
  heap_segment*
  gc_heap::get_segment (size_t size, BOOL loh_p)
  {
+    if (heap_hard_limit)
+        return NULL;
+
      heap_segment* result = 0;
  
      if (segment_standby_list != 0)
@@ -4891,10 +4938,8 @@ gc_heap::get_large_segment (size_t size, BOOL* did_full_compact_gc)
      size_t last_full_compact_gc_count = get_full_compact_gc_count();
  
      //access to get_segment needs to be serialized
-    add_saved_spinlock_info (me_release, mt_get_large_seg);
-
-    dprintf (SPINLOCK_LOG, ("[%d]Seg: Lmsl", heap_number));
-    leave_spin_lock (&more_space_lock);
+    add_saved_spinlock_info (true, me_release, mt_get_large_seg);
+    leave_spin_lock (&more_space_lock_loh);
      enter_spin_lock (&gc_heap::gc_lock);
      dprintf (SPINLOCK_LOG, ("[%d]Seg: Egc", heap_number));
      // if a GC happened between here and before we ask for a segment in 
@@ -4906,21 +4951,6 @@ gc_heap::get_large_segment (size_t size, BOOL* did_full_compact_gc)
          *did_full_compact_gc = TRUE;
      }
  
-#ifdef BACKGROUND_GC
-    while (current_c_gc_state == c_gc_state_planning)
-    {
-        dprintf (3, ("lh state planning, waiting to get a large seg"));
-
-        dprintf (SPINLOCK_LOG, ("[%d]Seg: P, Lgc", heap_number));
-        leave_spin_lock (&gc_lock);
-        background_gc_wait_lh (awr_get_loh_seg);
-        enter_spin_lock (&gc_lock);
-        dprintf (SPINLOCK_LOG, ("[%d]Seg: P, Egc", heap_number));
-    }
-    assert ((current_c_gc_state == c_gc_state_free) ||
-            (current_c_gc_state == c_gc_state_marking));
-#endif //BACKGROUND_GC
-
      heap_segment* res = get_segment_for_loh (size
  #ifdef MULTIPLE_HEAPS
                                              , this
@@ -4929,13 +4959,8 @@ gc_heap::get_large_segment (size_t size, BOOL* did_full_compact_gc)
  
      dprintf (SPINLOCK_LOG, ("[%d]Seg: A Lgc", heap_number));
      leave_spin_lock (&gc_heap::gc_lock);
-    enter_spin_lock (&more_space_lock);
-    dprintf (SPINLOCK_LOG, ("[%d]Seg: A Emsl", heap_number));
-    add_saved_spinlock_info (me_acquire, mt_get_large_seg);
-    
-#ifdef BACKGROUND_GC
-    wait_for_background_planning (awr_get_loh_seg);
-#endif //BACKGROUND_GC
+    enter_spin_lock (&more_space_lock_loh);
+    add_saved_spinlock_info (true, me_acquire, mt_get_large_seg);
  
      return res;
  }
@@ -4987,7 +5012,7 @@ extern "C" uint64_t __rdtsc();
      {
          return (ptrdiff_t)__rdtsc();
      }
-#elif defined(__clang__)    
+#elif defined(__GNUC__)
      static ptrdiff_t get_cycle_count()
      {
          ptrdiff_t cycles;
@@ -5029,8 +5054,6 @@ class heap_select
      static uint16_t proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
      static uint16_t heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
      static uint16_t heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
-    static uint16_t heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
      static uint16_t numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
  
      static int access_time(uint8_t *sniff_buffer, int heap_number, unsigned sniff_index, unsigned n_sniff_buffers)
@@ -5163,26 +5186,6 @@ public:
          heap_no_to_numa_node[heap_number] = numa_node;
      }
  
-    static uint16_t find_cpu_group_from_heap_no(int heap_number)
-    {
-        return heap_no_to_cpu_group[heap_number];
-    }
-
-    static void set_cpu_group_for_heap(int heap_number, uint16_t group_number)
-    {
-        heap_no_to_cpu_group[heap_number] = group_number;
-    }
-
-    static uint16_t find_group_proc_from_heap_no(int heap_number)
-    {
-        return heap_no_to_group_proc[heap_number];
-    }
-
-    static void set_group_proc_for_heap(int heap_number, uint16_t group_proc)
-    {
-        heap_no_to_group_proc[heap_number] = group_proc;
-    }
-
      static void init_numa_node_to_heap_map(int nheaps)
      {   // called right after GCHeap::Init() for each heap is finished
          // when numa is not enabled, heap_no_to_numa_node[] are all filled
@@ -5212,8 +5215,6 @@ unsigned heap_select::cur_sniff_index;
  uint16_t heap_select::proc_no_to_heap_no[MAX_SUPPORTED_CPUS];
  uint16_t heap_select::heap_no_to_proc_no[MAX_SUPPORTED_CPUS];
  uint16_t heap_select::heap_no_to_numa_node[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_cpu_group[MAX_SUPPORTED_CPUS];
-uint16_t heap_select::heap_no_to_group_proc[MAX_SUPPORTED_CPUS];
  uint16_t heap_select::numa_node_to_heap_map[MAX_SUPPORTED_CPUS+4];
  
  BOOL gc_heap::create_thread_support (unsigned number_of_heaps)
@@ -5256,82 +5257,21 @@ void gc_heap::destroy_thread_support ()
      }
  }
  
-void set_thread_group_affinity_for_heap(int heap_number, GCThreadAffinity* affinity)
+void set_thread_affinity_for_heap(int heap_number)
  {
-    affinity->Group = GCThreadAffinity::None;
-    affinity->Processor = GCThreadAffinity::None;
-
-    uint16_t gn, gpn;
-    GCToOSInterface::GetGroupForProcessor((uint16_t)heap_number, &gn, &gpn);
+    uint16_t proc_no;
+    uint16_t node_no;
  
-    int bit_number = 0;
-    for (uintptr_t mask = 1; mask !=0; mask <<=1) 
+    if (GCToOSInterface::GetProcessorForHeap(heap_number, &proc_no, &node_no))
      {
-        if (bit_number == gpn)
+        heap_select::set_proc_no_for_heap(heap_number, proc_no);
+        if (node_no != NUMA_NODE_UNDEFINED)
          {
-            dprintf(3, ("using processor group %d, mask %Ix for heap %d\n", gn, mask, heap_number));
-            affinity->Processor = gpn;
-            affinity->Group = gn;
-            heap_select::set_cpu_group_for_heap(heap_number, gn);
-            heap_select::set_group_proc_for_heap(heap_number, gpn);
-            if (GCToOSInterface::CanEnableGCNumaAware())
-            {  
-                PROCESSOR_NUMBER proc_no;
-                proc_no.Group    = gn;
-                proc_no.Number   = (uint8_t)gpn;
-                proc_no.Reserved = 0;
-
-                uint16_t node_no = 0;
-                if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
-                    heap_select::set_numa_node_for_heap(heap_number, node_no);
-            }
-            else
-            {   // no numa setting, each cpu group is treated as a node
-                heap_select::set_numa_node_for_heap(heap_number, gn);
-            }
-            return;
+            heap_select::set_numa_node_for_heap(heap_number, node_no);
          }
-        bit_number++;
-    }
-}
-
-void set_thread_affinity_mask_for_heap(int heap_number, GCThreadAffinity* affinity)
-{
-    affinity->Group = GCThreadAffinity::None;
-    affinity->Processor = GCThreadAffinity::None;
-
-    uintptr_t pmask, smask;
-    if (GCToOSInterface::GetCurrentProcessAffinityMask(&pmask, &smask))
-    {
-        pmask &= smask;
-        int bit_number = 0; 
-        uint8_t proc_number = 0;
-        for (uintptr_t mask = 1; mask != 0; mask <<= 1)
+        if (!GCToOSInterface::SetThreadAffinity(proc_no))
          {
-            if ((mask & pmask) != 0)
-            {
-                if (bit_number == heap_number)
-                {
-                    dprintf (3, ("Using processor %d for heap %d", proc_number, heap_number));
-                    affinity->Processor = proc_number;
-                    heap_select::set_proc_no_for_heap(heap_number, proc_number);
-                    if (GCToOSInterface::CanEnableGCNumaAware())
-                    {
-                        uint16_t node_no = 0;
-                        PROCESSOR_NUMBER proc_no;
-                        proc_no.Group = 0;
-                        proc_no.Number = (uint8_t)proc_number;
-                        proc_no.Reserved = 0;
-                        if (GCToOSInterface::GetNumaProcessorNode(&proc_no, &node_no))
-                        {
-                            heap_select::set_numa_node_for_heap(heap_number, node_no);
-                        }
-                    }
-                    return;
-                }
-                bit_number++;
-            }
-            proc_number++;
+            dprintf(1, ("Failed to set thread affinity for server GC thread"));
          }
      }
  }
@@ -5388,8 +5328,15 @@ void gc_heap::gc_thread_function ()
          assert ((heap_number == 0) || proceed_with_gc_p);
  
          if (proceed_with_gc_p)
+        {
              garbage_collect (GCHeap::GcCondemnedGeneration);
  
+            if (pm_trigger_full_gc)
+            {
+                garbage_collect_pm_full_gc();
+            }
+        }
+
          if (heap_number == 0)
          {
              if (proceed_with_gc_p && (!settings.concurrent))
@@ -5405,9 +5352,8 @@ void gc_heap::gc_thread_function ()
              for (int i = 0; i < gc_heap::n_heaps; i++)
              {
                  gc_heap* hp = gc_heap::g_heaps[i];
-                hp->add_saved_spinlock_info (me_release, mt_block_gc);
-                dprintf (SPINLOCK_LOG, ("[%d]GC Lmsl", i));
-                leave_spin_lock(&hp->more_space_lock);
+                hp->add_saved_spinlock_info (false, me_release, mt_block_gc);
+                leave_spin_lock(&hp->more_space_lock_soh);
              }
  #endif //MULTIPLE_HEAPS
  
@@ -5455,18 +5401,18 @@ void gc_heap::gc_thread_function ()
  
  #endif //MULTIPLE_HEAPS
  
-bool virtual_alloc_commit_for_heap(void* addr, size_t size, int h_number)
+bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_number)
  {
  #if defined(MULTIPLE_HEAPS) && !defined(FEATURE_REDHAWK)
      // Currently there is no way for us to specific the numa node to allocate on via hosting interfaces to
      // a host. This will need to be added later.
-#if !defined(FEATURE_CORECLR)
+#if !defined(FEATURE_CORECLR) && !defined(BUILD_AS_STANDALONE)
      if (!CLRMemoryHosted())
  #endif
      {
          if (GCToOSInterface::CanEnableGCNumaAware())
          {
-            uint32_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
+            uint16_t numa_node = heap_select::find_numa_node_from_heap_no(h_number);
              if (GCToOSInterface::VirtualCommit(addr, size, numa_node))
                  return true;
          }
@@ -5479,6 +5425,86 @@ bool virtual_alloc_commit_for_heap(void* addr, size_t size, int h_number)
      return GCToOSInterface::VirtualCommit(addr, size);
  }
  
+bool gc_heap::virtual_commit (void* address, size_t size, int h_number, bool* hard_limit_exceeded_p)
+{
+#ifndef BIT64
+    assert (heap_hard_limit == 0);
+#endif //!BIT64
+
+    if (heap_hard_limit)
+    {
+        bool exceeded_p = false;
+
+        check_commit_cs.Enter();
+
+        if ((current_total_committed + size) > heap_hard_limit)
+        {
+            dprintf (1, ("%Id + %Id = %Id > limit",
+                current_total_committed, size,
+                (current_total_committed + size),
+                heap_hard_limit));
+
+            exceeded_p = true;
+        }
+        else
+        {
+            current_total_committed += size;
+            if (h_number < 0)
+                current_total_committed_bookkeeping += size;
+        }
+
+        check_commit_cs.Leave();
+
+        if (hard_limit_exceeded_p)
+            *hard_limit_exceeded_p = exceeded_p;
+
+        if (exceeded_p)
+        {
+            dprintf (1, ("can't commit %Ix for %Id bytes > HARD LIMIT %Id", (size_t)address, size, heap_hard_limit));
+            return false;
+        }
+    }
+
+    // If it's a valid heap number it means it's commiting for memory on the GC heap.
+    bool commit_succeeded_p = ((h_number >= 0) ? 
+        virtual_alloc_commit_for_heap (address, size, h_number) : 
+        GCToOSInterface::VirtualCommit(address, size));
+
+    if (!commit_succeeded_p && heap_hard_limit)
+    {
+        check_commit_cs.Enter();
+        dprintf (1, ("commit failed, updating %Id to %Id",
+                current_total_committed, (current_total_committed - size)));
+        current_total_committed -= size;
+        if (h_number < 0)
+            current_total_committed_bookkeeping -= size;
+
+        check_commit_cs.Leave();
+    }
+
+    return commit_succeeded_p;
+}
+
+bool gc_heap::virtual_decommit (void* address, size_t size, int h_number)
+{
+#ifndef BIT64
+    assert (heap_hard_limit == 0);
+#endif //!BIT64
+
+    bool decommit_succeeded_p = GCToOSInterface::VirtualDecommit (address, size);
+
+    if (decommit_succeeded_p && heap_hard_limit)
+    {
+        check_commit_cs.Enter();
+        current_total_committed -= size;
+        if (h_number < 0)
+            current_total_committed_bookkeeping -= size;
+        check_commit_cs.Leave();
+    }
+
+    return decommit_succeeded_p;
+}
+
  #ifndef SEG_MAPPING_TABLE
  inline
  heap_segment* gc_heap::segment_of (uint8_t* add, ptrdiff_t& delta, BOOL verify_p)
@@ -5739,7 +5765,7 @@ void gc_mechanisms::init_mechanisms()
      promotion = FALSE;//TRUE;
      compaction = TRUE;
  #ifdef FEATURE_LOH_COMPACTION
-    loh_compaction = gc_heap::should_compact_loh();
+    loh_compaction = gc_heap::loh_compaction_requested();
  #else
      loh_compaction = FALSE;
  #endif //FEATURE_LOH_COMPACTION
@@ -5951,13 +5977,13 @@ struct fix_alloc_context_args
      void* heap;
  };
  
-void fix_alloc_context(gc_alloc_context* acontext, void* param)
+void fix_alloc_context (gc_alloc_context* acontext, void* param)
  {
      fix_alloc_context_args* args = (fix_alloc_context_args*)param;
-    g_theGCHeap->FixAllocContext(acontext, false, (void*)(size_t)(args->for_gc_p), args->heap);
+    g_theGCHeap->FixAllocContext(acontext, (void*)(size_t)(args->for_gc_p), args->heap);
  }
  
-void gc_heap::fix_allocation_contexts(BOOL for_gc_p)
+void gc_heap::fix_allocation_contexts (BOOL for_gc_p)
  {
      fix_alloc_context_args args;
      args.for_gc_p = for_gc_p;
@@ -5983,6 +6009,16 @@ void gc_heap::fix_older_allocation_area (generation* older_gen)
              assert ((size >= Align (min_obj_size)));
              dprintf(3,("Making unused area [%Ix, %Ix[", (size_t)point, (size_t)point+size));
              make_unused_array (point, size);
+            if (size >= min_free_list)
+            {
+                generation_allocator (older_gen)->thread_item_front (point, size);
+                add_gen_free (older_gen->gen_num, size);
+                generation_free_list_space (older_gen) += size;
+            }
+            else
+            {
+                generation_free_obj_space (older_gen) += size;
+            }
          }
      }
      else
@@ -5993,6 +6029,9 @@ void gc_heap::fix_older_allocation_area (generation* older_gen)
          generation_allocation_limit (older_gen) =
              generation_allocation_pointer (older_gen);
      }
+
+    generation_allocation_pointer (older_gen) = 0;
+    generation_allocation_limit (older_gen) = 0;
  }
  
  void gc_heap::set_allocation_heap_segment (generation* gen)
@@ -6438,7 +6477,7 @@ size_t card_bundle_cardw (size_t cardb)
  void gc_heap::card_bundle_clear (size_t cardb)
  {
      card_bundle_table [card_bundle_word (cardb)] &= ~(1 << card_bundle_bit (cardb));
-    dprintf (1,("Cleared card bundle %Ix [%Ix, %Ix[", cardb, (size_t)card_bundle_cardw (cardb),
+    dprintf (2, ("Cleared card bundle %Ix [%Ix, %Ix[", cardb, (size_t)card_bundle_cardw (cardb),
                (size_t)card_bundle_cardw (cardb+1)));
  }
  
@@ -6751,7 +6790,6 @@ short*& card_table_brick_table (uint32_t* c_table)
  }
  
  #ifdef CARD_BUNDLE
-// Get the card bundle table for the specified card table.
  inline
  uint32_t*& card_table_card_bundle_table (uint32_t* c_table)
  {
@@ -7115,13 +7153,13 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
      // mark array will be committed separately (per segment).
      size_t commit_size = alloc_size - ms;
  
-    if (!GCToOSInterface::VirtualCommit (mem, commit_size))
+    if (!virtual_commit (mem, commit_size))
      {
-        dprintf (2, ("Card table commit failed"));
+        dprintf (1, ("Card table commit failed"));
          GCToOSInterface::VirtualRelease (mem, alloc_size);
          return 0;
      }
-
+    
      // initialize the ref count
      uint32_t* ct = (uint32_t*)(mem+sizeof (card_table_info));
      card_table_refcount (ct) = 0;
@@ -7327,7 +7365,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
              // mark array will be committed separately (per segment).
              size_t commit_size = alloc_size - ms;
  
-            if (!GCToOSInterface::VirtualCommit (mem, commit_size))
+            if (!virtual_commit (mem, commit_size))
              {
                  dprintf (GC_TABLE_LOG, ("Table commit failed"));
                  set_fgm_result (fgm_commit_table, commit_size, loh_p);
@@ -7450,6 +7488,8 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
                  saved_g_lowest_address,
                  saved_g_highest_address);
  
+            seg_mapping_table = new_seg_mapping_table;
+
              // Since the runtime is already suspended, update the write barrier here as well.
              // This passes a bool telling whether we need to switch to the post
              // grow version of the write barrier.  This test tells us if the new
@@ -7475,14 +7515,13 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
  #endif
          }
  
-        seg_mapping_table = new_seg_mapping_table;
-
-        GCToOSInterface::FlushProcessWriteBuffers();
-        g_gc_lowest_address = saved_g_lowest_address;
-        g_gc_highest_address = saved_g_highest_address;
-
          if (!write_barrier_updated)
          {
+            seg_mapping_table = new_seg_mapping_table;
+            GCToOSInterface::FlushProcessWriteBuffers();
+            g_gc_lowest_address = saved_g_lowest_address;
+            g_gc_highest_address = saved_g_highest_address;
+
              // This passes a bool telling whether we need to switch to the post
              // grow version of the write barrier.  This test tells us if the new
              // segment was allocated at a lower address than the old, requiring
@@ -7494,7 +7533,6 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
              stomp_write_barrier_resize(GCToEEInterface::IsGCThread(), la != saved_g_lowest_address);
          }
  
-
          return 0;
          
  fail:
@@ -7697,8 +7735,11 @@ void gc_heap::copy_brick_card_table()
  
  #ifdef CARD_BUNDLE
  #if defined(MARK_ARRAY) && defined(_DEBUG)
+    size_t cb_end = (size_t)((uint8_t*)card_table_card_bundle_table (ct) + size_card_bundle_of (g_gc_lowest_address, g_gc_highest_address));
  #ifdef GROWABLE_SEG_MAPPING_TABLE
      size_t st = size_seg_mapping_table_of (g_gc_lowest_address, g_gc_highest_address);
+    size_t cb_end_aligned = align_for_seg_mapping_table (cb_end);
+    st += (cb_end_aligned - cb_end);
  #else  //GROWABLE_SEG_MAPPING_TABLE
      size_t st = 0;
  #endif //GROWABLE_SEG_MAPPING_TABLE
@@ -8688,7 +8729,7 @@ public:
              }
          }
  
-        int bucket_power2 = index_of_set_bit (round_down_power2 (size));
+        int bucket_power2 = index_of_highest_set_bit (size);
          if (bucket_power2 < base_power2)
          {
              return;
@@ -8785,13 +8826,14 @@ public:
  
          size_t plug_size_to_fit = plug_size;
  
-        int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0;
+        // best fit is only done for gen1 to gen2 and we do not pad in gen2.
+        int pad_in_front = 0;
  
  #ifdef SHORT_PLUGS
          plug_size_to_fit += (pad_in_front ? Align(min_obj_size) : 0);
  #endif //SHORT_PLUGS
  
-        int plug_power2 = index_of_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
+        int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
          ptrdiff_t i;
          uint8_t* new_address = 0;
  
@@ -8873,9 +8915,9 @@ retry:
                                  (plug_size - pad),
                                  pad,
                                  pinned_plug (m), 
-                                index_of_set_bit (round_down_power2 (free_space_size)),
+                                index_of_highest_set_bit (free_space_size),
                                  (pinned_plug (m) - pinned_len (m)), 
-                                index_of_set_bit (round_down_power2 (new_free_space_size))));
+                                index_of_highest_set_bit (new_free_space_size)));
  #endif //SIMPLE_DPRINTF
  
  #ifdef SHORT_PLUGS
@@ -8919,9 +8961,9 @@ retry:
                                  old_loc,
                                  new_address, 
                                  (plug_size - pad),
-                                index_of_set_bit (round_down_power2 (free_space_size)),
+                                index_of_highest_set_bit (free_space_size),
                                  heap_segment_plan_allocated (seg), 
-                                index_of_set_bit (round_down_power2 (new_free_space_size))));
+                                index_of_highest_set_bit (new_free_space_size)));
  #endif //SIMPLE_DPRINTF
  
                      if (realign_padding_p)
@@ -8950,10 +8992,10 @@ retry:
                  new_address += pad;
              }
              assert ((chosen_power2 && (i == 0)) ||
-                    (!chosen_power2) && (i < free_space_count));
+                    ((!chosen_power2) && (i < free_space_count)));
          }
  
-        int new_bucket_power2 = index_of_set_bit (round_down_power2 (new_free_space_size));
+        int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size);
  
          if (new_bucket_power2 < base_power2)
          {
@@ -9165,7 +9207,7 @@ heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, int h
      size_t initial_commit = SEGMENT_INITIAL_COMMIT;
  
      //Commit the first page
-    if (!virtual_alloc_commit_for_heap (new_pages, initial_commit, h_number))
+    if (!virtual_commit (new_pages, initial_commit, h_number))
      {
          return 0;
      }
@@ -9276,7 +9318,7 @@ void gc_heap::decommit_heap_segment_pages (heap_segment* seg,
          page_start += max(extra_space, 32*OS_PAGE_SIZE);
          size -= max (extra_space, 32*OS_PAGE_SIZE);
  
-        GCToOSInterface::VirtualDecommit (page_start, size);
+        virtual_decommit (page_start, size, heap_number);
          dprintf (3, ("Decommitting heap segment [%Ix, %Ix[(%d)", 
              (size_t)page_start, 
              (size_t)(page_start + size),
@@ -9301,7 +9343,7 @@ void gc_heap::decommit_heap_segment (heap_segment* seg)
  #endif //BACKGROUND_GC
  
      size_t size = heap_segment_committed (seg) - page_start;
-    GCToOSInterface::VirtualDecommit (page_start, size);
+    virtual_decommit (page_start, size, heap_number);
  
      //re-init the segment object
      heap_segment_committed (seg) = page_start;
@@ -9812,6 +9854,41 @@ FILE* CreateLogFile(const GCConfigStringHolder& temp_logfile_name, bool is_confi
  }
  #endif //TRACE_GC || GC_CONFIG_DRIVEN
  
+size_t gc_heap::get_segment_size_hard_limit (uint32_t* num_heaps, bool should_adjust_num_heaps)
+{
+    assert (heap_hard_limit);
+    size_t aligned_hard_limit = ((heap_hard_limit + min_segment_size_hard_limit - 1) & ~(min_segment_size_hard_limit - 1));
+    if (should_adjust_num_heaps)
+    {
+        uint32_t max_num_heaps = (uint32_t)(aligned_hard_limit / min_segment_size_hard_limit);
+        if (*num_heaps > max_num_heaps)
+        {
+            *num_heaps = max_num_heaps;
+        }
+    }
+
+    size_t seg_size = aligned_hard_limit / *num_heaps;
+    size_t aligned_seg_size = round_up_power2 (seg_size);
+
+    assert (g_theGCHeap->IsValidSegmentSize (aligned_seg_size));
+
+    size_t seg_size_from_config = (size_t)GCConfig::GetSegmentSize();
+    if (seg_size_from_config)
+    {
+        size_t aligned_seg_size_config = round_up_power2 (seg_size_from_config);
+
+        aligned_seg_size = max (aligned_seg_size, aligned_seg_size_config);
+    }
+
+    //printf ("limit: %Idmb, aligned: %Idmb, %d heaps, seg size from config: %Idmb, seg size %Idmb", 
+    //    (heap_hard_limit / 1024 / 1024),
+    //    (aligned_hard_limit / 1024 / 1024),
+    //    *num_heaps, 
+    //    (seg_size_from_config / 1024 / 1024),
+    //    (aligned_seg_size / 1024 / 1024));
+    return aligned_seg_size;
+}
+
  HRESULT gc_heap::initialize_gc (size_t segment_size,
                                  size_t heap_size
  #ifdef MULTIPLE_HEAPS
@@ -9942,6 +10019,11 @@ HRESULT gc_heap::initialize_gc (size_t segment_size,
      block_count = 1;
  #endif //MULTIPLE_HEAPS
  
+    if (heap_hard_limit)
+    {
+        check_commit_cs.Initialize();
+    }
+
      if (!reserve_initial_memory(segment_size,heap_size,block_count))
          return E_OUTOFMEMORY;
  
@@ -9983,8 +10065,6 @@ HRESULT gc_heap::initialize_gc (size_t segment_size,
      gc_started = FALSE;
  
  #ifdef MULTIPLE_HEAPS
-    n_heaps = number_of_heaps;
-
      g_heaps = new (nothrow) gc_heap* [number_of_heaps];
      if (!g_heaps)
          return E_OUTOFMEMORY;
@@ -10017,6 +10097,19 @@ HRESULT gc_heap::initialize_gc (size_t segment_size,
  
  #endif //MULTIPLE_HEAPS
  
+#ifdef MULTIPLE_HEAPS
+    yp_spin_count_unit = 32 * number_of_heaps;
+#else
+    yp_spin_count_unit = 32 * g_num_processors;
+#endif //MULTIPLE_HEAPS
+
+#if defined(__linux__)
+    GCToEEInterface::UpdateGCEventStatus(static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Default)),
+                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Default)),
+                                         static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Private)),
+                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Private)));
+#endif // __linux__
+
      if (!init_semi_shared())
      {
          hres = E_FAIL;
@@ -10102,6 +10195,9 @@ gc_heap::init_semi_shared()
      loh_compaction_mode = loh_compaction_default;
  #endif //FEATURE_LOH_COMPACTION
  
+    loh_size_threshold = (size_t)GCConfig::GetLOHThreshold();
+    assert (loh_size_threshold >= LARGE_OBJECT_SIZE);
+
  #ifdef BACKGROUND_GC
      memset (ephemeral_fgc_counts, 0, sizeof (ephemeral_fgc_counts));
      bgc_alloc_spin_count = static_cast<uint32_t>(GCConfig::GetBGCSpinCount());
@@ -10271,12 +10367,12 @@ retry:
          {
              if  (g_num_processors > 1)
              {
-                int spin_count = 32 * g_num_processors;
+                int spin_count = yp_spin_count_unit;
                  for (int j = 0; j < spin_count; j++)
                  {
                      if  (gc_done_event_lock < 0)
                          break;
-                    YieldProcessor();           // indicate to the processor that we are spining
+                    YieldProcessor();           // indicate to the processor that we are spinning
                  }
                  if  (gc_done_event_lock >= 0)
                      GCToOSInterface::YieldThread(++dwSwitchCount);
@@ -10308,6 +10404,7 @@ GCEvent gc_heap::gc_done_event;
  VOLATILE(bool) gc_heap::internal_gc_done;
  
  void gc_heap::add_saved_spinlock_info (
+            bool loh_p, 
              msl_enter_state enter_state, 
              msl_take_state take_state)
  
@@ -10318,6 +10415,12 @@ void gc_heap::add_saved_spinlock_info (
      current->enter_state = enter_state;
      current->take_state = take_state;
      current->thread_id.SetToCurrentThread();
+    current->loh_p = loh_p;
+    dprintf (SPINLOCK_LOG, ("[%d]%s %s %s", 
+        heap_number, 
+        (loh_p ? "loh" : "soh"),
+        ((enter_state == me_acquire) ? "E" : "L"),
+        msl_take_state_str[take_state]));
  
      spinlock_info_index++;
  
@@ -10340,6 +10443,8 @@ gc_heap::init_gc_heap (int  h_number)
  
      time_bgc_last = 0;
  
+    allocated_since_last_gc = 0;
+
  #ifdef SPINLOCK_HISTORY
      spinlock_info_index = 0;
      memset (last_spinlock_info, 0, sizeof(last_spinlock_info));
@@ -10368,7 +10473,9 @@ gc_heap::init_gc_heap (int  h_number)
  
      mark_stack_array = 0;
  
+#if defined (_DEBUG) && defined (VERIFY_HEAP)
      verify_pinned_queue_p = FALSE;
+#endif // _DEBUG && VERIFY_HEAP
  
      loh_pinned_queue_tos = 0;
  
@@ -10390,7 +10497,9 @@ gc_heap::init_gc_heap (int  h_number)
  
      allocation_quantum = CLR_SIZE;
  
-    more_space_lock = gc_lock;
+    more_space_lock_soh = gc_lock;
+
+    more_space_lock_loh = gc_lock;
  
      ro_segments_in_range = FALSE;
  
@@ -10398,6 +10507,8 @@ gc_heap::init_gc_heap (int  h_number)
  
      new_heap_segment = NULL;
  
+    gen0_allocated_after_gc_p = false;
+
  #ifdef RECORD_LOH_STATE
      loh_state_index = 0;
  #endif //RECORD_LOH_STATE
@@ -10638,6 +10749,8 @@ gc_heap::init_gc_heap (int  h_number)
  
      last_gc_before_oom = FALSE;
  
+    sufficient_gen0_space_p = FALSE;
+
  #ifdef MULTIPLE_HEAPS
  
  #ifdef HEAP_ANALYZE
@@ -10860,10 +10973,13 @@ BOOL gc_heap::a_size_fit_p (size_t size, uint8_t* alloc_pointer, uint8_t* alloc_
  }
  
  // Grow by committing more pages
-BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address)
+BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address, bool* hard_limit_exceeded_p)
  {
      assert (high_address <= heap_segment_reserved (seg));
  
+    if (hard_limit_exceeded_p)
+        *hard_limit_exceeded_p = false;
+
      //return 0 if we are at the end of the segment.
      if (align_on_page (high_address) > heap_segment_reserved (seg))
          return FALSE;
@@ -10872,7 +10988,7 @@ BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address)
          return TRUE;
  
      size_t c_size = align_on_page ((size_t)(high_address - heap_segment_committed (seg)));
-    c_size = max (c_size, 16*OS_PAGE_SIZE);
+    c_size = max (c_size, commit_min_th);
      c_size = min (c_size, (size_t)(heap_segment_reserved (seg) - heap_segment_committed (seg)));
  
      if (c_size == 0)
@@ -10882,28 +10998,25 @@ BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address)
                  "Growing heap_segment: %Ix high address: %Ix\n",
                  (size_t)seg, (size_t)high_address);
  
-    dprintf(3, ("Growing segment allocation %Ix %Ix", (size_t)heap_segment_committed(seg),c_size));
-    
-    if (!virtual_alloc_commit_for_heap(heap_segment_committed (seg), c_size, heap_number))
+    bool ret = virtual_commit (heap_segment_committed (seg), c_size, heap_number, hard_limit_exceeded_p);
+    if (ret)
      {
-        dprintf(3, ("Cannot grow heap segment"));
-        return FALSE;
-    }
  #ifdef MARK_ARRAY
  #ifndef BACKGROUND_GC
-    clear_mark_array (heap_segment_committed (seg),
-                      heap_segment_committed (seg)+c_size, TRUE);
+        clear_mark_array (heap_segment_committed (seg),
+                        heap_segment_committed (seg)+c_size, TRUE);
  #endif //BACKGROUND_GC
  #endif //MARK_ARRAY
-    heap_segment_committed (seg) += c_size;
-    STRESS_LOG1(LF_GC, LL_INFO10000, "New commit: %Ix",
-                (size_t)heap_segment_committed (seg));
+        heap_segment_committed (seg) += c_size;
  
-    assert (heap_segment_committed (seg) <= heap_segment_reserved (seg));
+        STRESS_LOG1(LF_GC, LL_INFO10000, "New commit: %Ix",
+                    (size_t)heap_segment_committed (seg));
  
-    assert (high_address <= heap_segment_committed (seg));
+        assert (heap_segment_committed (seg) <= heap_segment_reserved (seg));
+        assert (high_address <= heap_segment_committed (seg));
+    }
  
-    return TRUE;
+    return !!ret;
  }
  
  inline
@@ -11354,14 +11467,26 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size,
                                  alloc_context* acontext, heap_segment* seg,
                                  int align_const, int gen_number)
  {
+    bool loh_p = (gen_number > 0);
+    GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh;
+
      size_t aligned_min_obj_size = Align(min_obj_size, align_const);
  
-    //probably should pass seg==0 for free lists.
      if (seg)
      {
          assert (heap_segment_used (seg) <= heap_segment_committed (seg));
      }
  
+#ifdef MULTIPLE_HEAPS
+    if (gen_number == 0)
+    {
+        if (!gen0_allocated_after_gc_p)
+        {
+            gen0_allocated_after_gc_p = true;
+        }
+    }
+#endif //MULTIPLE_HEAPS
+
      dprintf (3, ("Expanding segment allocation [%Ix, %Ix[", (size_t)start,
                 (size_t)start + limit_size - aligned_min_obj_size));
  
@@ -11382,21 +11507,24 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size,
          }
          acontext->alloc_ptr = start;
      }
-    else  
-    {  
-        // If the next alloc context is right up against the current one it means we are absorbing the min  
-        // object, so need to account for that.  
-        acontext->alloc_bytes += (start - acontext->alloc_limit);  
-    }  
-
+    else
+    {
+        if (gen_number == 0)
+        {
+            size_t pad_size = Align (min_obj_size, align_const);
+            make_unused_array (acontext->alloc_ptr, pad_size);
+            dprintf (3, ("contigous ac: making min obj gap %Ix->%Ix(%Id)", 
+                acontext->alloc_ptr, (acontext->alloc_ptr + pad_size), pad_size));
+            acontext->alloc_ptr += pad_size;
+        }
+    }
      acontext->alloc_limit = (start + limit_size - aligned_min_obj_size);
      acontext->alloc_bytes += limit_size - ((gen_number < max_generation + 1) ? aligned_min_obj_size : 0);
  
  #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
-    if (g_fEnableARM)
+    if (g_fEnableAppDomainMonitoring)
      {
-        AppDomain* alloc_appdomain = GetAppDomain();
-        alloc_appdomain->RecordAllocBytes (limit_size, heap_number);
+        GCToEEInterface::RecordAllocatedBytesForHeap(limit_size, heap_number);
      }
  #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
  
@@ -11435,9 +11563,8 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size,
      if ((seg == 0) ||
          (start - plug_skew + limit_size) <= heap_segment_used (seg))
      {
-        dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear memory(1)", heap_number));
-        add_saved_spinlock_info (me_release, mt_clr_mem);
-        leave_spin_lock (&more_space_lock);
+        add_saved_spinlock_info (loh_p, me_release, mt_clr_mem);
+        leave_spin_lock (msl);
          dprintf (3, ("clearing memory at %Ix for %d bytes", (start - plug_skew), limit_size));
          memclr (start - plug_skew, limit_size);
      }
@@ -11446,9 +11573,9 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size,
          uint8_t* used = heap_segment_used (seg);
          heap_segment_used (seg) = start + limit_size - plug_skew;
  
-        dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear memory", heap_number));
-        add_saved_spinlock_info (me_release, mt_clr_mem);
-        leave_spin_lock (&more_space_lock);
+        add_saved_spinlock_info (loh_p, me_release, mt_clr_mem);
+        leave_spin_lock (msl);
+
          if ((start - plug_skew) < used)
          {
              if (used != saved_used)
@@ -11491,19 +11618,36 @@ void gc_heap::adjust_limit_clr (uint8_t* start, size_t limit_size,
      //verify_mem_cleared (start - plug_skew, limit_size);
  }
  
-/* in order to make the allocator faster, allocate returns a
- * 0 filled object. Care must be taken to set the allocation limit to the
- * allocation pointer after gc
- */
+size_t gc_heap::new_allocation_limit (size_t size, size_t physical_limit, int gen_number)
+{
+    dynamic_data* dd = dynamic_data_of (gen_number);
+    ptrdiff_t new_alloc = dd_new_allocation (dd);
+    assert (new_alloc == (ptrdiff_t)Align (new_alloc,
+                                           get_alignment_constant (!(gen_number == (max_generation+1)))));
+
+    ptrdiff_t logical_limit = max (new_alloc, (ptrdiff_t)size);
+    size_t limit = min (logical_limit, (ptrdiff_t)physical_limit);
+    assert (limit == Align (limit, get_alignment_constant (!(gen_number == (max_generation+1)))));
+    dd_new_allocation (dd) = (new_alloc - limit);
+    return limit;
+}
  
-size_t gc_heap::limit_from_size (size_t size, size_t room, int gen_number,
+size_t gc_heap::limit_from_size (size_t size, size_t physical_limit, int gen_number,
                                   int align_const)
  {
-    size_t new_limit = new_allocation_limit ((size + Align (min_obj_size, align_const)),
-                                             min (room,max (size + Align (min_obj_size, align_const),
-                                                            ((gen_number < max_generation+1) ?
-                                                             allocation_quantum :
-                                                             0))),
+    size_t padded_size = size + Align (min_obj_size, align_const);
+    // for LOH this is not true...we could select a physical_limit that's exactly the same
+    // as size.
+    assert ((gen_number != 0) || (physical_limit >= padded_size));
+    size_t min_size_to_allocate = ((gen_number == 0) ? allocation_quantum : 0);
+
+    // For SOH if the size asked for is very small, we want to allocate more than 
+    // just what's asked for if possible.
+    size_t desired_size_to_allocate  = max (padded_size, min_size_to_allocate);
+    size_t new_physical_limit = min (physical_limit, desired_size_to_allocate);
+
+    size_t new_limit = new_allocation_limit (padded_size,
+                                             new_physical_limit,
                                               gen_number);
      assert (new_limit >= (size + Align (min_obj_size, align_const)));
      dprintf (100, ("requested to allocate %Id bytes, actual size is %Id", size, new_limit));
@@ -11513,8 +11657,6 @@ size_t gc_heap::limit_from_size (size_t size, size_t room, int gen_number,
  void gc_heap::handle_oom (int heap_num, oom_reason reason, size_t alloc_size, 
                            uint8_t* allocated, uint8_t* reserved)
  {
-    dprintf (1, ("total committed on the heap is %Id", get_total_committed_size()));
-
      UNREFERENCED_PARAMETER(heap_num);
  
      if (reason == oom_budget)
@@ -11797,10 +11939,21 @@ BOOL gc_heap::short_on_end_of_seg (int gen_number,
      UNREFERENCED_PARAMETER(gen_number);
      uint8_t* allocated = heap_segment_allocated(seg);
  
-    return (!a_size_fit_p (end_space_after_gc(),
-                          allocated,
-                          heap_segment_reserved (seg), 
-                          align_const));
+    BOOL sufficient_p = sufficient_space_end_seg (allocated, 
+                                                  heap_segment_reserved (seg), 
+                                                  end_space_after_gc(),
+                                                  tuning_deciding_short_on_seg);
+    if (!sufficient_p)
+    {
+        if (sufficient_gen0_space_p)
+        {
+            dprintf (GTC_LOG, ("gen0 has enough free space"));
+        }
+
+        sufficient_p = sufficient_gen0_space_p;
+    }
+
+    return !sufficient_p;
  }
  
  #ifdef _MSC_VER
@@ -11894,10 +12047,9 @@ void gc_heap::bgc_loh_alloc_clr (uint8_t* alloc_start,
      make_unused_array (alloc_start, size);
  
  #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
-    if (g_fEnableARM)
+    if (g_fEnableAppDomainMonitoring)
      {
-        AppDomain* alloc_appdomain = GetAppDomain();
-        alloc_appdomain->RecordAllocBytes (size, heap_number);
+        GCToEEInterface::RecordAllocatedBytesForHeap(size, heap_number);
      }
  #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
  
@@ -11948,8 +12100,8 @@ void gc_heap::bgc_loh_alloc_clr (uint8_t* alloc_start,
  #endif //VERIFY_HEAP
      
      dprintf (SPINLOCK_LOG, ("[%d]Lmsl to clear large obj", heap_number));
-    add_saved_spinlock_info (me_release, mt_clr_large_mem);
-    leave_spin_lock (&more_space_lock);
+    add_saved_spinlock_info (true, me_release, mt_clr_large_mem);
+    leave_spin_lock (&more_space_lock_loh);
      memclr (alloc_start + size_to_skip, size_to_clear);
  
      bgc_alloc_lock->loh_alloc_set (alloc_start);
@@ -11966,10 +12118,6 @@ BOOL gc_heap::a_fit_free_list_large_p (size_t size,
                                         alloc_context* acontext,
                                         int align_const)
  {
-#ifdef BACKGROUND_GC
-    wait_for_background_planning (awr_loh_alloc_during_plan);
-#endif //BACKGROUND_GC
-
      BOOL can_fit = FALSE;
      int gen_number = max_generation + 1;
      generation* gen = generation_of (gen_number);
@@ -12004,6 +12152,7 @@ BOOL gc_heap::a_fit_free_list_large_p (size_t size,
                  {
  #ifdef BACKGROUND_GC
                      cookie = bgc_alloc_lock->loh_alloc_set (free_list);
+                    bgc_track_loh_alloc();
  #endif //BACKGROUND_GC
  
                      //unlink the free_item
@@ -12077,6 +12226,7 @@ BOOL gc_heap::a_fit_segment_end_p (int gen_number,
  {
      *commit_failed_p = FALSE;
      size_t limit = 0;
+    bool hard_limit_short_seg_end_p = false;
  #ifdef BACKGROUND_GC
      int cookie = -1;
  #endif //BACKGROUND_GC
@@ -12088,9 +12238,10 @@ BOOL gc_heap::a_fit_segment_end_p (int gen_number,
      size_t pad = Align (min_obj_size, align_const);
  
  #ifdef FEATURE_LOH_COMPACTION
+    size_t loh_pad = Align (loh_padding_obj_size, align_const);
      if (gen_number == (max_generation + 1))
      {
-        pad += Align (loh_padding_obj_size, align_const);
+        pad += loh_pad;
      }
  #endif //FEATURE_LOH_COMPACTION
  
@@ -12111,16 +12262,25 @@ BOOL gc_heap::a_fit_segment_end_p (int gen_number,
          limit = limit_from_size (size, 
                                   (end - allocated), 
                                   gen_number, align_const);
-        if (grow_heap_segment (seg, allocated + limit))
+
+        if (grow_heap_segment (seg, (allocated + limit), &hard_limit_short_seg_end_p))
          {
              goto found_fit;
          }
          else
          {
-            dprintf (2, ("can't grow segment, doing a full gc"));
-            *commit_failed_p = TRUE;
+            if (!hard_limit_short_seg_end_p)
+            {
+                dprintf (2, ("can't grow segment, doing a full gc"));
+                *commit_failed_p = TRUE;
+            }
+            else
+            {
+                assert (heap_hard_limit);
+            }
          }
      }
+
      goto found_no_fit;
  
  found_fit:
@@ -12129,6 +12289,7 @@ found_fit:
      if (gen_number != 0)
      {
          cookie = bgc_alloc_lock->loh_alloc_set (allocated);
+        bgc_track_loh_alloc();
      }
  #endif //BACKGROUND_GC
  
@@ -12137,7 +12298,6 @@ found_fit:
  #ifdef FEATURE_LOH_COMPACTION
      if (gen_number == (max_generation + 1))
      {
-        size_t loh_pad = Align (loh_padding_obj_size, align_const);
          make_unused_array (old_alloc, loh_pad);
          old_alloc += loh_pad;
          allocated += loh_pad;
@@ -12183,25 +12343,30 @@ BOOL gc_heap::loh_a_fit_segment_end_p (int gen_number,
  
      while (seg)
      {
-        if (a_fit_segment_end_p (gen_number, seg, (size - Align (min_obj_size, align_const)), 
-                                 acontext, align_const, commit_failed_p))
+#ifdef BACKGROUND_GC
+        if (seg->flags & heap_segment_flags_loh_delete)
          {
-            acontext->alloc_limit += Align (min_obj_size, align_const);
-            can_allocate_p = TRUE;
-            break;
+            dprintf (3, ("h%d skipping seg %Ix to be deleted", heap_number, (size_t)seg));
          }
          else
+#endif //BACKGROUND_GC
          {
-            if (*commit_failed_p)
+            if (a_fit_segment_end_p (gen_number, seg, (size - Align (min_obj_size, align_const)), 
+                                        acontext, align_const, commit_failed_p))
              {
-                *oom_r = oom_cant_commit;
+                acontext->alloc_limit += Align (min_obj_size, align_const);
+                can_allocate_p = TRUE;
                  break;
              }
-            else
+
+            if (*commit_failed_p)
              {
-                seg = heap_segment_next_rw (seg);
+                *oom_r = oom_cant_commit;
+                break;
              }
          }
+
+        seg = heap_segment_next_rw (seg);
      }
  
      return can_allocate_p;
@@ -12209,28 +12374,28 @@ BOOL gc_heap::loh_a_fit_segment_end_p (int gen_number,
  
  #ifdef BACKGROUND_GC
  inline
-void gc_heap::wait_for_background (alloc_wait_reason awr)
+void gc_heap::wait_for_background (alloc_wait_reason awr, bool loh_p)
  {
+    GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh;
+
      dprintf (2, ("BGC is already in progress, waiting for it to finish"));
-    dprintf (SPINLOCK_LOG, ("[%d]Lmsl to wait for bgc done", heap_number));
-    add_saved_spinlock_info (me_release, mt_wait_bgc);
-    leave_spin_lock (&more_space_lock);
+    add_saved_spinlock_info (loh_p, me_release, mt_wait_bgc);
+    leave_spin_lock (msl);
      background_gc_wait (awr);
-    enter_spin_lock (&more_space_lock);
-    add_saved_spinlock_info (me_acquire, mt_wait_bgc);
-    dprintf (SPINLOCK_LOG, ("[%d]Emsl after waiting for bgc done", heap_number));
+    enter_spin_lock (msl);
+    add_saved_spinlock_info (loh_p, me_acquire, mt_wait_bgc);
  }
  
-void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr)
+void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr, bool loh_p)
  {
      if (recursive_gc_sync::background_running_p())
      {
          uint32_t memory_load;
          get_memory_info (&memory_load);
-        if (memory_load >= 95)
+        if (memory_load >= m_high_memory_load_th)
          {
              dprintf (GTC_LOG, ("high mem - wait for BGC to finish, wait reason: %d", awr));
-            wait_for_background (awr);
+            wait_for_background (awr, loh_p);
          }
      }
  }
@@ -12242,7 +12407,7 @@ void gc_heap::wait_for_bgc_high_memory (alloc_wait_reason awr)
  BOOL gc_heap::trigger_ephemeral_gc (gc_reason gr)
  {
  #ifdef BACKGROUND_GC
-    wait_for_bgc_high_memory (awr_loh_oos_bgc);
+    wait_for_bgc_high_memory (awr_loh_oos_bgc, false);
  #endif //BACKGROUND_GC
  
      BOOL did_full_compact_gc = FALSE;
@@ -12252,9 +12417,8 @@ BOOL gc_heap::trigger_ephemeral_gc (gc_reason gr)
      vm_heap->GarbageCollectGeneration(max_generation - 1, gr);
  
  #ifdef MULTIPLE_HEAPS
-    enter_spin_lock (&more_space_lock);
-    add_saved_spinlock_info (me_acquire, mt_t_eph_gc);
-    dprintf (SPINLOCK_LOG, ("[%d]Emsl after a GC", heap_number));
+    enter_spin_lock (&more_space_lock_soh);
+    add_saved_spinlock_info (false, me_acquire, mt_t_eph_gc);
  #endif //MULTIPLE_HEAPS
  
      size_t current_full_compact_gc_count = get_full_compact_gc_count();
@@ -12300,10 +12464,10 @@ BOOL gc_heap::soh_try_fit (int gen_number,
      return can_allocate;
  }
  
-BOOL gc_heap::allocate_small (int gen_number,
-                              size_t size, 
-                              alloc_context* acontext,
-                              int align_const)
+allocation_state gc_heap::allocate_small (int gen_number,
+                                          size_t size, 
+                                          alloc_context* acontext,
+                                          int align_const)
  {
  #if defined (BACKGROUND_GC) && !defined (MULTIPLE_HEAPS)
      if (recursive_gc_sync::background_running_p())
@@ -12311,15 +12475,13 @@ BOOL gc_heap::allocate_small (int gen_number,
          background_soh_alloc_count++;
          if ((background_soh_alloc_count % bgc_alloc_spin_count) == 0)
          {
-            add_saved_spinlock_info (me_release, mt_alloc_small);
-            dprintf (SPINLOCK_LOG, ("[%d]spin Lmsl", heap_number));
-            leave_spin_lock (&more_space_lock);
-            bool cooperative_mode = enable_preemptive ();
+            add_saved_spinlock_info (false, me_release, mt_alloc_small);
+            leave_spin_lock (&more_space_lock_soh);
+            bool cooperative_mode = enable_preemptive();
              GCToOSInterface::Sleep (bgc_alloc_spin);
              disable_preemptive (cooperative_mode);
-            enter_spin_lock (&more_space_lock);
-            add_saved_spinlock_info (me_acquire, mt_alloc_small);
-            dprintf (SPINLOCK_LOG, ("[%d]spin Emsl", heap_number));
+            enter_spin_lock (&more_space_lock_soh);
+            add_saved_spinlock_info (false, me_acquire, mt_alloc_small);
          }
          else
          {
@@ -12340,6 +12502,7 @@ BOOL gc_heap::allocate_small (int gen_number,
      while (1)
      {
          dprintf (3, ("[h%d]soh state is %s", heap_number, allocation_state_str[soh_alloc_state]));
+
          switch (soh_alloc_state)
          {
              case a_state_can_allocate:
@@ -12392,35 +12555,29 @@ BOOL gc_heap::allocate_small (int gen_number,
                  can_use_existing_p = soh_try_fit (gen_number, size, acontext,
                                                    align_const, &commit_failed_p,
                                                    &short_seg_end_p);
-                if (short_seg_end_p)
+
+                if (can_use_existing_p)
+                {
+                    soh_alloc_state = a_state_can_allocate;
+                }
+#ifdef MULTIPLE_HEAPS
+                else if (gen0_allocated_after_gc_p)
+                {
+                    // some other threads already grabbed the more space lock and allocated
+                    // so we should attempt an ephemeral GC again.
+                    soh_alloc_state = a_state_trigger_ephemeral_gc; 
+                }
+#endif //MULTIPLE_HEAPS
+                else if (short_seg_end_p)
                  {
                      soh_alloc_state = a_state_cant_allocate;
                      oom_r = oom_budget;
                  }
-                else
+                else 
                  {
-                    if (can_use_existing_p)
-                    {
-                        soh_alloc_state = a_state_can_allocate;
-                    }
-                    else
-                    {
-#ifdef MULTIPLE_HEAPS
-                        if (!commit_failed_p)
-                        {
-                            // some other threads already grabbed the more space lock and allocated
-                            // so we should attempt an ephemeral GC again.
-                            assert (heap_segment_allocated (ephemeral_heap_segment) < alloc_allocated);
-                            soh_alloc_state = a_state_trigger_ephemeral_gc; 
-                        }
-                        else
-#endif //MULTIPLE_HEAPS
-                        {
-                            assert (commit_failed_p);
-                            soh_alloc_state = a_state_cant_allocate;
-                            oom_r = oom_cant_commit;
-                        }
-                    }
+                    assert (commit_failed_p);
+                    soh_alloc_state = a_state_cant_allocate;
+                    oom_r = oom_cant_commit;
                  }
                  break;
              }
@@ -12429,7 +12586,7 @@ BOOL gc_heap::allocate_small (int gen_number,
                  BOOL bgc_in_progress_p = FALSE;
                  BOOL did_full_compacting_gc = FALSE;
  
-                bgc_in_progress_p = check_and_wait_for_bgc (awr_gen0_oos_bgc, &did_full_compacting_gc);
+                bgc_in_progress_p = check_and_wait_for_bgc (awr_gen0_oos_bgc, &did_full_compacting_gc, false);
                  soh_alloc_state = (did_full_compacting_gc ? 
                                          a_state_try_fit_after_cg : 
                                          a_state_try_fit_after_bgc);
@@ -12457,45 +12614,41 @@ BOOL gc_heap::allocate_small (int gen_number,
                      bgc_in_progress_p = recursive_gc_sync::background_running_p();
  #endif //BACKGROUND_GC
  
-                    if (short_seg_end_p)
+                    if (can_use_existing_p)
                      {
-                        soh_alloc_state = (bgc_in_progress_p ? 
-                                                a_state_check_and_wait_for_bgc : 
-                                                a_state_trigger_full_compact_gc);
-
-                        if (fgn_maxgen_percent)
-                        {
-                            dprintf (2, ("FGN: doing last GC before we throw OOM"));
-                            send_full_gc_notification (max_generation, FALSE);
-                        }
+                        soh_alloc_state = a_state_can_allocate;
                      }
                      else
                      {
-                        if (can_use_existing_p)
-                        {
-                            soh_alloc_state = a_state_can_allocate;
-                        }
-                        else
+                        if (short_seg_end_p)
                          {
-#ifdef MULTIPLE_HEAPS
-                            if (!commit_failed_p)
+                            if (should_expand_in_full_gc)
                              {
-                                // some other threads already grabbed the more space lock and allocated
-                                // so we should attempt an ephemeral GC again.
-                                assert (heap_segment_allocated (ephemeral_heap_segment) < alloc_allocated);
-                                soh_alloc_state = a_state_trigger_ephemeral_gc;
+                                dprintf (2, ("gen1 GC wanted to expand!"));
+                                soh_alloc_state = a_state_trigger_full_compact_gc;
                              }
                              else
-#endif //MULTIPLE_HEAPS
                              {
-                                soh_alloc_state = a_state_trigger_full_compact_gc;
-                                if (fgn_maxgen_percent)
-                                {
-                                    dprintf (2, ("FGN: failed to commit, doing full compacting GC"));
-                                    send_full_gc_notification (max_generation, FALSE);
-                                }
+                                soh_alloc_state = (bgc_in_progress_p ? 
+                                                        a_state_check_and_wait_for_bgc : 
+                                                        a_state_trigger_full_compact_gc);
                              }
                          }
+                        else if (commit_failed_p)
+                        {
+                            soh_alloc_state = a_state_trigger_full_compact_gc;
+                        }
+                        else
+                        {
+#ifdef MULTIPLE_HEAPS
+                            // some other threads already grabbed the more space lock and allocated
+                            // so we should attempt an ephemeral GC again.
+                            assert (gen0_allocated_after_gc_p);
+                            soh_alloc_state = a_state_trigger_ephemeral_gc; 
+#else //MULTIPLE_HEAPS
+                            assert (!"shouldn't get here");
+#endif //MULTIPLE_HEAPS
+                        }
                      }
                  }
                  break;
@@ -12533,9 +12686,15 @@ BOOL gc_heap::allocate_small (int gen_number,
              }
              case a_state_trigger_full_compact_gc:
              {
+                if (fgn_maxgen_percent)
+                {
+                    dprintf (2, ("FGN: SOH doing last GC before we throw OOM"));
+                    send_full_gc_notification (max_generation, FALSE);
+                }
+
                  BOOL got_full_compacting_gc = FALSE;
  
-                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r);
+                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, false);
                  soh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate);
                  break;
              }
@@ -12557,37 +12716,41 @@ exit:
                      heap_segment_allocated (ephemeral_heap_segment),
                      heap_segment_reserved (ephemeral_heap_segment));
  
-        dprintf (SPINLOCK_LOG, ("[%d]Lmsl for oom", heap_number));
-        add_saved_spinlock_info (me_release, mt_alloc_small_cant);
-        leave_spin_lock (&more_space_lock);
+        add_saved_spinlock_info (false, me_release, mt_alloc_small_cant);
+        leave_spin_lock (&more_space_lock_soh);
      }
  
-    return (soh_alloc_state == a_state_can_allocate);
+    assert ((soh_alloc_state == a_state_can_allocate) ||
+            (soh_alloc_state == a_state_cant_allocate) ||
+            (soh_alloc_state == a_state_retry_allocate));
+
+    return soh_alloc_state;
  }
  
  #ifdef BACKGROUND_GC
  inline
-void gc_heap::wait_for_background_planning (alloc_wait_reason awr)
+void gc_heap::bgc_track_loh_alloc()
  {
-    while (current_c_gc_state == c_gc_state_planning)
+    if (current_c_gc_state == c_gc_state_planning)
      {
-        dprintf (3, ("lh state planning, cannot allocate"));
+        Interlocked::Increment (&loh_alloc_thread_count);
+        dprintf (3, ("h%d: inc lc: %d", heap_number, (int32_t)loh_alloc_thread_count));
+    }
+}
  
-        dprintf (SPINLOCK_LOG, ("[%d]Lmsl to wait for bgc plan", heap_number));
-        add_saved_spinlock_info (me_release, mt_wait_bgc_plan);
-        leave_spin_lock (&more_space_lock);
-        background_gc_wait_lh (awr);
-        enter_spin_lock (&more_space_lock);
-        add_saved_spinlock_info (me_acquire, mt_wait_bgc_plan);
-        dprintf (SPINLOCK_LOG, ("[%d]Emsl after waiting for bgc plan", heap_number));
+inline
+void gc_heap::bgc_untrack_loh_alloc()
+{
+    if (current_c_gc_state == c_gc_state_planning)
+    {
+        Interlocked::Decrement (&loh_alloc_thread_count);
+        dprintf (3, ("h%d: dec lc: %d", heap_number, (int32_t)loh_alloc_thread_count));
      }
-    assert ((current_c_gc_state == c_gc_state_free) ||
-            (current_c_gc_state == c_gc_state_marking));
  }
  
  BOOL gc_heap::bgc_loh_should_allocate()
  {
-    size_t min_gc_size = dd_min_size(dynamic_data_of (max_generation + 1));
+    size_t min_gc_size = dd_min_size (dynamic_data_of (max_generation + 1));
  
      if ((bgc_begin_loh_size + bgc_loh_size_increased) < (min_gc_size * 10))
      {
@@ -12657,6 +12820,9 @@ BOOL gc_heap::loh_get_new_seg (generation* gen,
      return (new_seg != 0);
  }
  
+// PERF TODO: this is too aggressive; and in hard limit we should
+// count the actual allocated bytes instead of only updating it during
+// getting a new seg.
  BOOL gc_heap::retry_full_compact_gc (size_t size)
  {
      size_t seg_size = get_large_seg_size (size);
@@ -12683,7 +12849,8 @@ BOOL gc_heap::retry_full_compact_gc (size_t size)
  }
  
  BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr,
-                                      BOOL* did_full_compact_gc)
+                                      BOOL* did_full_compact_gc,
+                                      bool loh_p)
  {
      BOOL bgc_in_progress = FALSE;
      *did_full_compact_gc = FALSE;
@@ -12692,7 +12859,7 @@ BOOL gc_heap::check_and_wait_for_bgc (alloc_wait_reason awr,
      {
          bgc_in_progress = TRUE;
          size_t last_full_compact_gc_count = get_full_compact_gc_count();
-        wait_for_background (awr);
+        wait_for_background (awr, loh_p);
          size_t current_full_compact_gc_count = get_full_compact_gc_count();
          if (current_full_compact_gc_count > last_full_compact_gc_count)
          {
@@ -12740,7 +12907,8 @@ BOOL gc_heap::loh_try_fit (int gen_number,
  }
  
  BOOL gc_heap::trigger_full_compact_gc (gc_reason gr, 
-                                       oom_reason* oom_r)
+                                       oom_reason* oom_r,
+                                       bool loh_p)
  {
      BOOL did_full_compact_gc = FALSE;
  
@@ -12755,11 +12923,12 @@ BOOL gc_heap::trigger_full_compact_gc (gc_reason gr,
  #ifdef BACKGROUND_GC
      if (recursive_gc_sync::background_running_p())
      {
-        wait_for_background ((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc);
+        wait_for_background (((gr == reason_oos_soh) ? awr_gen0_oos_bgc : awr_loh_oos_bgc), loh_p);
          dprintf (2, ("waited for BGC - done"));
      }
  #endif //BACKGROUND_GC
  
+    GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh;
      size_t current_full_compact_gc_count = get_full_compact_gc_count();
      if (current_full_compact_gc_count > last_full_compact_gc_count)
      {
@@ -12770,13 +12939,8 @@ BOOL gc_heap::trigger_full_compact_gc (gc_reason gr,
      }
  
      dprintf (3, ("h%d full GC", heap_number));
-    vm_heap->GarbageCollectGeneration(max_generation, gr);
  
-#ifdef MULTIPLE_HEAPS
-    enter_spin_lock (&more_space_lock);
-    dprintf (SPINLOCK_LOG, ("[%d]Emsl after full gc", heap_number));
-    add_saved_spinlock_info (me_acquire, mt_t_full_gc);
-#endif //MULTIPLE_HEAPS
+    trigger_gc_for_alloc (max_generation, gr, msl, loh_p, mt_t_full_gc);
  
      current_full_compact_gc_count = get_full_compact_gc_count();
  
@@ -12824,13 +12988,36 @@ void gc_heap::add_saved_loh_state (allocation_state loh_state_to_save, EEThreadI
  }
  #endif //RECORD_LOH_STATE
  
-BOOL gc_heap::allocate_large (int gen_number,
-                              size_t size, 
-                              alloc_context* acontext,
-                              int align_const)
+bool gc_heap::should_retry_other_heap (size_t size)
+{
+#ifdef MULTIPLE_HEAPS
+    if (heap_hard_limit)
+    {
+        size_t total_heap_committed_recorded = 
+            current_total_committed - current_total_committed_bookkeeping;
+        size_t min_size = dd_min_size (g_heaps[0]->dynamic_data_of (max_generation + 1));
+        size_t slack_space = max (commit_min_th, min_size);
+        bool retry_p = ((total_heap_committed_recorded + size) < (heap_hard_limit - slack_space));
+        dprintf (1, ("%Id - %Id - total committed %Id - size %Id = %Id, %s",
+            heap_hard_limit, slack_space, total_heap_committed_recorded, size,
+            (heap_hard_limit - slack_space - total_heap_committed_recorded - size),
+            (retry_p ? "retry" : "no retry")));
+        return retry_p;
+    }
+    else
+#endif //MULTIPLE_HEAPS
+    {
+        return false;
+    }
+}
+
+allocation_state gc_heap::allocate_large (int gen_number,
+                                          size_t size, 
+                                          alloc_context* acontext,
+                                          int align_const)
  {
  #ifdef BACKGROUND_GC
-    if (recursive_gc_sync::background_running_p() && (current_c_gc_state != c_gc_state_planning))
+    if (recursive_gc_sync::background_running_p())
      {
          background_loh_alloc_count++;
          //if ((background_loh_alloc_count % bgc_alloc_spin_count_loh) == 0)
@@ -12839,20 +13026,19 @@ BOOL gc_heap::allocate_large (int gen_number,
              {
                  if (!bgc_alloc_spin_loh)
                  {
-                    add_saved_spinlock_info (me_release, mt_alloc_large);
-                    dprintf (SPINLOCK_LOG, ("[%d]spin Lmsl loh", heap_number));
-                    leave_spin_lock (&more_space_lock);
-                    bool cooperative_mode = enable_preemptive ();
+                    add_saved_spinlock_info (true, me_release, mt_alloc_large);
+                    leave_spin_lock (&more_space_lock_loh);
+                    bool cooperative_mode = enable_preemptive();
                      GCToOSInterface::YieldThread (bgc_alloc_spin_loh);
                      disable_preemptive (cooperative_mode);
-                    enter_spin_lock (&more_space_lock);
-                    add_saved_spinlock_info (me_acquire, mt_alloc_large);
+                    enter_spin_lock (&more_space_lock_loh);
+                    add_saved_spinlock_info (true, me_acquire, mt_alloc_large);
                      dprintf (SPINLOCK_LOG, ("[%d]spin Emsl loh", heap_number));
                  }
              }
              else
              {
-                wait_for_background (awr_loh_alloc_during_bgc);
+                wait_for_background (awr_loh_alloc_during_bgc, true);
              }
          }
      }
@@ -12920,38 +13106,6 @@ BOOL gc_heap::allocate_large (int gen_number,
                  assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
                  break;
              }
-            case a_state_try_fit_new_seg_after_cg:
-            {
-                BOOL commit_failed_p = FALSE;
-                BOOL can_use_existing_p = FALSE;
-
-                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
-                                                  align_const, &commit_failed_p, &oom_r);
-                // Even after we got a new seg it doesn't necessarily mean we can allocate,
-                // another LOH allocating thread could have beat us to acquire the msl so 
-                // we need to try again. However, if we failed to commit, which means we 
-                // did have space on the seg, we bail right away 'cause we already did a 
-                // full compacting GC.
-                loh_alloc_state = (can_use_existing_p ? 
-                                        a_state_can_allocate : 
-                                        (commit_failed_p ? 
-                                            a_state_cant_allocate :
-                                            a_state_acquire_seg_after_cg));
-                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
-                break;
-            }
-            case a_state_try_fit_no_seg:
-            {
-                BOOL commit_failed_p = FALSE;
-                BOOL can_use_existing_p = FALSE;
-
-                can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
-                                                  align_const, &commit_failed_p, &oom_r);
-                loh_alloc_state = (can_use_existing_p ? a_state_can_allocate : a_state_cant_allocate);
-                assert ((loh_alloc_state == a_state_can_allocate) == (acontext->alloc_ptr != 0));
-                assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
-                break;
-            }
              case a_state_try_fit_after_cg:
              {
                  BOOL commit_failed_p = FALSE;
@@ -12959,6 +13113,8 @@ BOOL gc_heap::allocate_large (int gen_number,
  
                  can_use_existing_p = loh_try_fit (gen_number, size, acontext, 
                                                    align_const, &commit_failed_p, &oom_r);
+                // If we failed to commit, we bail right away 'cause we already did a 
+                // full compacting GC.
                  loh_alloc_state = (can_use_existing_p ?
                                          a_state_can_allocate : 
                                          (commit_failed_p ? 
@@ -13009,7 +13165,7 @@ BOOL gc_heap::allocate_large (int gen_number,
                  // threads could have allocated a bunch of segments before us so
                  // we might need to retry.
                  loh_alloc_state = (can_get_new_seg_p ? 
-                                        a_state_try_fit_new_seg_after_cg : 
+                                        a_state_try_fit_after_cg : 
                                          a_state_check_retry_seg);
                  break;
              }
@@ -13034,13 +13190,7 @@ BOOL gc_heap::allocate_large (int gen_number,
                  BOOL bgc_in_progress_p = FALSE;
                  BOOL did_full_compacting_gc = FALSE;
  
-                if (fgn_maxgen_percent)
-                {
-                    dprintf (2, ("FGN: failed to acquire seg, may need to do a full blocking GC"));
-                    send_full_gc_notification (max_generation, FALSE);
-                }
-
-                bgc_in_progress_p = check_and_wait_for_bgc (awr_loh_oos_bgc, &did_full_compacting_gc);
+                bgc_in_progress_p = check_and_wait_for_bgc (awr_loh_oos_bgc, &did_full_compacting_gc, true);
                  loh_alloc_state = (!bgc_in_progress_p ?
                                          a_state_trigger_full_compact_gc : 
                                          (did_full_compacting_gc ? 
@@ -13050,9 +13200,15 @@ BOOL gc_heap::allocate_large (int gen_number,
              }
              case a_state_trigger_full_compact_gc:
              {
+                if (fgn_maxgen_percent)
+                {
+                    dprintf (2, ("FGN: LOH doing last GC before we throw OOM"));
+                    send_full_gc_notification (max_generation, FALSE);
+                }
+
                  BOOL got_full_compacting_gc = FALSE;
  
-                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r);
+                got_full_compacting_gc = trigger_full_compact_gc (gr, &oom_r, true);
                  loh_alloc_state = (got_full_compacting_gc ? a_state_try_fit_after_cg : a_state_cant_allocate);
                  assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
                  break;
@@ -13065,8 +13221,7 @@ BOOL gc_heap::allocate_large (int gen_number,
                  {
                      size_t last_full_compact_gc_count = current_full_compact_gc_count;
                      current_full_compact_gc_count = get_full_compact_gc_count();
-
-                    if (current_full_compact_gc_count > (last_full_compact_gc_count + 1))
+                    if (current_full_compact_gc_count > last_full_compact_gc_count)
                      {
                          should_retry_get_seg = TRUE;
                      }
@@ -13075,7 +13230,7 @@ BOOL gc_heap::allocate_large (int gen_number,
                  loh_alloc_state = (should_retry_gc ? 
                                          a_state_trigger_full_compact_gc : 
                                          (should_retry_get_seg ?
-                                            a_state_acquire_seg_after_cg :
+                                            a_state_try_fit_after_cg :
                                              a_state_cant_allocate));
                  assert ((loh_alloc_state != a_state_cant_allocate) || (oom_r != oom_no_failure));
                  break;
@@ -13092,34 +13247,77 @@ exit:
      if (loh_alloc_state == a_state_cant_allocate)
      {
          assert (oom_r != oom_no_failure);
-        handle_oom (heap_number, 
-                    oom_r, 
-                    size,
-                    0,
-                    0);
+        if (should_retry_other_heap (size))
+        {
+            loh_alloc_state = a_state_retry_allocate;
+        }
+        else
+        {
+            handle_oom (heap_number, 
+                        oom_r, 
+                        size,
+                        0,
+                        0);
+        }
+        add_saved_spinlock_info (true, me_release, mt_alloc_large_cant);
+        leave_spin_lock (&more_space_lock_loh);
+    }
+
+    assert ((loh_alloc_state == a_state_can_allocate) ||
+            (loh_alloc_state == a_state_cant_allocate) ||
+            (loh_alloc_state == a_state_retry_allocate));
+    return loh_alloc_state;
+}
+
+// BGC's final mark phase will acquire the msl, so release it here and re-acquire.
+void gc_heap::trigger_gc_for_alloc (int gen_number, gc_reason gr, 
+                                    GCSpinLock* msl, bool loh_p, 
+                                    msl_take_state take_state)
+{
+#ifdef BACKGROUND_GC
+    if (loh_p)
+    {
+        add_saved_spinlock_info (loh_p, me_release, take_state);
+        leave_spin_lock (msl);
+    }
+#endif //BACKGROUND_GC
  
-        add_saved_spinlock_info (me_release, mt_alloc_large_cant);
-        dprintf (SPINLOCK_LOG, ("[%d]Lmsl for loh oom", heap_number));
-        leave_spin_lock (&more_space_lock);
+    vm_heap->GarbageCollectGeneration (gen_number, gr);
+
+#ifdef MULTIPLE_HEAPS
+    if (!loh_p)
+    {
+        enter_spin_lock (msl);
+        add_saved_spinlock_info (loh_p, me_acquire, take_state);
      }
+#endif //MULTIPLE_HEAPS
  
-    return (loh_alloc_state == a_state_can_allocate);
+#ifdef BACKGROUND_GC
+    if (loh_p)
+    {
+        enter_spin_lock (msl);
+        add_saved_spinlock_info (loh_p, me_acquire, take_state);
+    }
+#endif //BACKGROUND_GC
  }
  
-int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size,
+allocation_state gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size,
                                     int gen_number)
  {
      if (gc_heap::gc_started)
      {
          wait_for_gc_done();
-        return -1;
+        return a_state_retry_allocate;
      }
  
+    bool loh_p = (gen_number > 0);
+    GCSpinLock* msl = loh_p ? &more_space_lock_loh : &more_space_lock_soh;
+
  #ifdef SYNCHRONIZATION_STATS
      int64_t msl_acquire_start = GCToOSInterface::QueryPerformanceCounter();
  #endif //SYNCHRONIZATION_STATS
-    enter_spin_lock (&more_space_lock);
-    add_saved_spinlock_info (me_acquire, mt_try_alloc);
+    enter_spin_lock (msl);
+    add_saved_spinlock_info (loh_p, me_acquire, mt_try_alloc);
      dprintf (SPINLOCK_LOG, ("[%d]Emsl for alloc", heap_number));
  #ifdef SYNCHRONIZATION_STATS
      int64_t msl_acquire = GCToOSInterface::QueryPerformanceCounter() - msl_acquire_start;
@@ -13176,7 +13374,7 @@ int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size,
          }
  
  #ifdef BACKGROUND_GC
-        wait_for_bgc_high_memory (awr_gen0_alloc);
+        wait_for_bgc_high_memory (awr_gen0_alloc, loh_p);
  #endif //BACKGROUND_GC
  
  #ifdef SYNCHRONIZATION_STATS
@@ -13186,26 +13384,23 @@ int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size,
  
          if (!settings.concurrent || (gen_number == 0))
          {
-            vm_heap->GarbageCollectGeneration (0, ((gen_number == 0) ? reason_alloc_soh : reason_alloc_loh));
-#ifdef MULTIPLE_HEAPS
-            enter_spin_lock (&more_space_lock);
-            add_saved_spinlock_info (me_acquire, mt_try_budget);
-            dprintf (SPINLOCK_LOG, ("[%d]Emsl out budget", heap_number));
-#endif //MULTIPLE_HEAPS
+            trigger_gc_for_alloc (0, ((gen_number == 0) ? reason_alloc_soh : reason_alloc_loh),
+                                  msl, loh_p, mt_try_budget);
          }
      }
  
-    BOOL can_allocate = ((gen_number == 0) ?
+    allocation_state can_allocate = ((gen_number == 0) ?
          allocate_small (gen_number, size, acontext, align_const) :
          allocate_large (gen_number, size, acontext, align_const));
     
-    if (can_allocate)
+    if (can_allocate == a_state_can_allocate)
      {
          size_t alloc_context_bytes = acontext->alloc_limit + Align (min_obj_size, align_const) - acontext->alloc_ptr;
          int etw_allocation_index = ((gen_number == 0) ? 0 : 1);
  
          etw_allocation_running_amount[etw_allocation_index] += alloc_context_bytes;
  
+        allocated_since_last_gc += alloc_context_bytes;
  
          if (etw_allocation_running_amount[etw_allocation_index] > etw_allocation_tick)
          {
@@ -13226,13 +13421,12 @@ int gc_heap::try_allocate_more_space (alloc_context* acontext, size_t size,
          }
      }
  
-    return (int)can_allocate;
+    return can_allocate;
  }
  
  #ifdef MULTIPLE_HEAPS
  void gc_heap::balance_heaps (alloc_context* acontext)
  {
-
      if (acontext->alloc_count < 4)
      {
          if (acontext->alloc_count == 0)
@@ -13341,34 +13535,12 @@ try_again:
                      org_hp->alloc_context_count--;
                      max_hp->alloc_context_count++;
                      acontext->set_alloc_heap(GCHeap::GetHeap(max_hp->heap_number));
-                    if (GCToOSInterface::CanEnableGCCPUGroups())
-                    {   //only set ideal processor when max_hp and org_hp are in the same cpu
-                        //group. DO NOT MOVE THREADS ACROSS CPU GROUPS
-                        uint16_t org_gn = heap_select::find_cpu_group_from_heap_no(org_hp->heap_number);
-                        uint16_t max_gn = heap_select::find_cpu_group_from_heap_no(max_hp->heap_number);
-                        if (org_gn == max_gn) //only set within CPU group, so SetThreadIdealProcessor is enough
-                        {   
-                            uint16_t group_proc_no = heap_select::find_group_proc_from_heap_no(max_hp->heap_number);
-
-                            GCThreadAffinity affinity;
-                            affinity.Processor = group_proc_no;
-                            affinity.Group = org_gn;
-                            if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
-                            {
-                                dprintf (3, ("Failed to set the ideal processor and group for heap %d.",
-                                            org_hp->heap_number));
-                            }
-                        }
-                    }
-                    else 
+                    if (!gc_thread_no_affinitize_p)
                      {
-                        uint16_t proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
-
-                        GCThreadAffinity affinity;
-                        affinity.Processor = proc_no;
-                        affinity.Group = GCThreadAffinity::None;
+                        uint16_t src_proc_no = heap_select::find_proc_no_from_heap_no(org_hp->heap_number);
+                        uint16_t dst_proc_no = heap_select::find_proc_no_from_heap_no(max_hp->heap_number);
  
-                        if (!GCToOSInterface::SetCurrentThreadIdealAffinity(&affinity))
+                        if (!GCToOSInterface::SetCurrentThreadIdealAffinity(src_proc_no, dst_proc_no))
                          {
                              dprintf (3, ("Failed to set the ideal processor for heap %d.",
                                          org_hp->heap_number));
@@ -13376,7 +13548,7 @@ try_again:
                      }
                      dprintf (3, ("Switching context %p (home heap %d) ", 
                                   acontext,
-                        acontext->get_home_heap()->pGenGCHeap->heap_number));
+                                 acontext->get_home_heap()->pGenGCHeap->heap_number));
                      dprintf (3, (" from heap %d (%Id free bytes, %d contexts) ", 
                                   org_hp->heap_number,
                                   org_size,
@@ -13392,10 +13564,10 @@ try_again:
      acontext->alloc_count++;
  }
  
-gc_heap* gc_heap::balance_heaps_loh (alloc_context* acontext, size_t /*size*/)
+gc_heap* gc_heap::balance_heaps_loh (alloc_context* acontext, size_t alloc_size)
  {
      gc_heap* org_hp = acontext->get_alloc_heap()->pGenGCHeap;
-    //dprintf (1, ("LA: %Id", size));
+    dprintf (3, ("[h%d] LA: %Id", org_hp->heap_number, alloc_size));
  
      //if (size > 128*1024)
      if (1)
@@ -13464,7 +13636,7 @@ try_again:
  BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size,
                                    int alloc_generation_number)
  {
-    int status;
+    allocation_state status;
      do
      { 
  #ifdef MULTIPLE_HEAPS
@@ -13477,14 +13649,18 @@ BOOL gc_heap::allocate_more_space(alloc_context* acontext, size_t size,
          {
              gc_heap* alloc_heap = balance_heaps_loh (acontext, size);
              status = alloc_heap->try_allocate_more_space (acontext, size, alloc_generation_number);
+            if (status == a_state_retry_allocate)
+            {
+                dprintf (3, ("LOH h%d alloc retry!", alloc_heap->heap_number));
+            }
          }
  #else
          status = try_allocate_more_space (acontext, size, alloc_generation_number);
  #endif //MULTIPLE_HEAPS
      }
-    while (status == -1);
+    while (status == a_state_retry_allocate);
      
-    return (status != 0);
+    return (status == a_state_can_allocate);
  }
  
  inline
@@ -13522,25 +13698,6 @@ CObjectHeader* gc_heap::allocate (size_t jsize, alloc_context* acontext)
      }
  }
  
-inline
-CObjectHeader* gc_heap::try_fast_alloc (size_t jsize)
-{
-    size_t size = Align (jsize);
-    assert (size >= Align (min_obj_size));
-    generation* gen = generation_of (0);
-    uint8_t*  result = generation_allocation_pointer (gen);
-    generation_allocation_pointer (gen) += size;
-    if (generation_allocation_pointer (gen) <=
-        generation_allocation_limit (gen))
-    {
-        return (CObjectHeader*)result;
-    }
-    else
-    {
-        generation_allocation_pointer (gen) -= size;
-        return 0;
-    }
-}
  void  gc_heap::leave_allocation_segment (generation* gen)
  {
      adjust_limit (0, 0, gen, max_generation);
@@ -13706,7 +13863,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
  
      allocator* gen_allocator = generation_allocator (gen);
      BOOL discard_p = gen_allocator->discard_if_no_fit_p ();
-    int pad_in_front = (old_loc != 0)? USE_PADDING_FRONT : 0;
+    int pad_in_front = ((old_loc != 0) && ((from_gen_number+1) != max_generation)) ? USE_PADDING_FRONT : 0;
  
      size_t real_size = size + Align (min_obj_size);
      if (pad_in_front)
@@ -13739,6 +13896,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
                          remove_gen_free (gen->gen_num, free_list_size);
  
                          adjust_limit (free_list, free_list_size, gen, from_gen_number+1);
+                        generation_allocate_end_seg_p (gen) = FALSE;
                          goto finished;
                      }
                      // We do first fit on bucket 0 because we are not guaranteed to find a fit there.
@@ -13761,7 +13919,6 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
              sz_list = sz_list * 2;
          }
          //go back to the beginning of the segment list 
-        generation_allocate_end_seg_p (gen) = TRUE;
          heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
          if (seg != generation_allocation_segment (gen))
          {
@@ -13778,6 +13935,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
                                heap_segment_committed (seg) -
                                heap_segment_plan_allocated (seg),
                                gen, from_gen_number+1);
+                generation_allocate_end_seg_p (gen) = TRUE;
                  // dformat (t, 3, "Expanding segment allocation");
                  heap_segment_plan_allocated (seg) =
                      heap_segment_committed (seg);
@@ -13794,6 +13952,7 @@ uint8_t* gc_heap::allocate_in_older_generation (generation* gen, size_t size,
                                    heap_segment_committed (seg) -
                                    heap_segment_plan_allocated (seg),
                                    gen, from_gen_number+1);
+                    generation_allocate_end_seg_p (gen) = TRUE;
                      heap_segment_plan_allocated (seg) =
                          heap_segment_committed (seg);
  
@@ -13955,7 +14114,7 @@ uint8_t* gc_heap::allocate_in_expanded_heap (generation* gen,
  
      size = Align (size);
      assert (size >= Align (min_obj_size));
-    int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0;
+    int pad_in_front = ((old_loc != 0) && (active_new_gen_number != max_generation)) ? USE_PADDING_FRONT : 0;
  
      if (consider_bestfit && use_bestfit)
      {
@@ -14192,11 +14351,10 @@ uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen,
          to_gen_number = from_gen_number + (settings.promotion ? 1 : 0);
      }
  
-    dprintf (3, ("aic gen%d: s: %Id, %d->%d, %Ix->%Ix", gen->gen_num, size, from_gen_number, 
-          to_gen_number, generation_allocation_pointer(gen), generation_allocation_limit(gen)));
-
-    int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0;
+    dprintf (3, ("aic gen%d: s: %Id", gen->gen_num, size));
  
+    int pad_in_front = ((old_loc != 0) && (to_gen_number != max_generation)) ? USE_PADDING_FRONT : 0;
+    
      if ((from_gen_number != -1) && (from_gen_number != (int)max_generation) && settings.promotion)
      {
          generation_condemned_allocated (generation_of (from_gen_number + (settings.promotion ? 1 : 0))) += size;
@@ -14434,27 +14592,32 @@ inline int power (int x, int y)
  }
  
  int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation, 
-                                           int n_initial,
+                                           int initial_gen,
+                                           int current_gen,
                                             BOOL* blocking_collection_p
                                             STRESS_HEAP_ARG(int n_original))
  {
-    int n = n_initial;
+    int n = current_gen;
  #ifdef MULTIPLE_HEAPS
-    BOOL blocking_p = *blocking_collection_p;
-    if (!blocking_p)
+    BOOL joined_last_gc_before_oom = FALSE;
+    for (int i = 0; i < n_heaps; i++)
      {
-        for (int i = 0; i < n_heaps; i++)
+        if (g_heaps[i]->last_gc_before_oom)
          {
-            if (g_heaps[i]->last_gc_before_oom)
-            {
-                dprintf (GTC_LOG, ("h%d is setting blocking to TRUE", i));
-                *blocking_collection_p = TRUE;
-                break;
-            }
+            dprintf (GTC_LOG, ("h%d is setting blocking to TRUE", i));
+            joined_last_gc_before_oom = TRUE;
+            break;
          }
      }
+#else
+    BOOL joined_last_gc_before_oom = last_gc_before_oom;
  #endif //MULTIPLE_HEAPS
  
+    if (joined_last_gc_before_oom && settings.pause_mode != pause_low_latency)
+    {
+        assert (*blocking_collection_p);
+    }
+
      if (should_evaluate_elevation && (n == max_generation))
      {
          dprintf (GTC_LOG, ("lock: %d(%d)", 
@@ -14485,11 +14648,88 @@ int gc_heap::joined_generation_to_condemn (BOOL should_evaluate_elevation,
          settings.elevation_locked_count = 0;
      }
  
+    if (provisional_mode_triggered && (n == max_generation))
+    {
+        // There are a few cases where we should not reduce the generation.
+        if ((initial_gen == max_generation) || (settings.reason == reason_alloc_loh))
+        {
+            // If we are doing a full GC in the provisional mode, we always
+            // make it blocking because we don't want to get into a situation
+            // where foreground GCs are asking for a compacting full GC right away
+            // and not getting it.
+            dprintf (GTC_LOG, ("full GC induced, not reducing gen"));
+            *blocking_collection_p = TRUE;
+        }
+        else if (should_expand_in_full_gc || joined_last_gc_before_oom)
+        {
+            dprintf (GTC_LOG, ("need full blocking GCs to expand heap or avoid OOM, not reducing gen"));
+            assert (*blocking_collection_p);
+        }
+        else
+        {
+            dprintf (GTC_LOG, ("reducing gen in PM: %d->%d->%d", initial_gen, n, (max_generation - 1)));
+            n = max_generation - 1;
+        }
+    }
+
+    if (should_expand_in_full_gc)
+    {
+        should_expand_in_full_gc = FALSE;
+    }
+
+    if (heap_hard_limit)
+    {
+        // If we have already consumed 90% of the limit, we should check to see if we should compact LOH.
+        // TODO: should unify this with gen2.
+        dprintf (GTC_LOG, ("committed %Id is %d%% of limit %Id", 
+            current_total_committed, (int)((float)current_total_committed * 100.0 / (float)heap_hard_limit),
+            heap_hard_limit));
+        if ((current_total_committed * 10) >= (heap_hard_limit * 9))
+        {
+            bool full_compact_gc_p = false;
+
+            size_t loh_frag = get_total_gen_fragmentation (max_generation + 1);
+            
+            // If the LOH frag is >= 1/8 it's worth compacting it
+            if ((loh_frag * 8) >= heap_hard_limit)
+            {
+                dprintf (GTC_LOG, ("loh frag: %Id > 1/8 of limit %Id", loh_frag, (heap_hard_limit / 8)));
+                full_compact_gc_p = true;
+            }
+            else
+            {
+                // If there's not much fragmentation but it looks like it'll be productive to
+                // collect LOH, do that.
+                size_t est_loh_reclaim = get_total_gen_estimated_reclaim (max_generation + 1);
+                full_compact_gc_p = ((est_loh_reclaim * 8) >= heap_hard_limit);
+                dprintf (GTC_LOG, ("loh est reclaim: %Id, 1/8 of limit %Id", est_loh_reclaim, (heap_hard_limit / 8)));
+            }
+
+            if (full_compact_gc_p)
+            {
+                n = max_generation;
+                *blocking_collection_p = TRUE;
+                settings.loh_compaction = TRUE;
+                dprintf (GTC_LOG, ("compacting LOH due to hard limit"));
+            }
+        }
+    }
+
+    if ((n == max_generation) && (*blocking_collection_p == FALSE))
+    {
+        // If we are doing a gen2 we should reset elevation regardless and let the gen2
+        // decide if we should lock again or in the bgc case by design we will not retract
+        // gen1 start.
+        settings.should_lock_elevation = FALSE;
+        settings.elevation_locked_count = 0;
+        dprintf (1, ("doing bgc, reset elevation"));
+    }
+
  #ifdef STRESS_HEAP
  #ifdef BACKGROUND_GC
      // We can only do Concurrent GC Stress if the caller did not explicitly ask for all
      // generations to be collected,
-
+    //
      // [LOCALGC TODO] STRESS_HEAP is not defined for a standalone GC so there are multiple
      // things that need to be fixed in this code block.
      if (n_original != max_generation &&
@@ -14567,6 +14807,23 @@ size_t gc_heap::get_total_survived_size()
      return total_surv_size;
  }
  
+size_t gc_heap::get_total_allocated_since_last_gc()
+{
+    size_t total_allocated_size = 0;
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[i];
+        total_allocated_size += hp->allocated_since_last_gc;
+        hp->allocated_since_last_gc = 0;
+    }
+#else
+    total_allocated_size = allocated_since_last_gc;
+    allocated_since_last_gc = 0;
+#endif //MULTIPLE_HEAPS
+    return total_allocated_size;
+}
+
  // Gets what's allocated on both SOH and LOH that hasn't been collected.
  size_t gc_heap::get_current_allocated()
  {
@@ -14799,21 +15056,24 @@ int gc_heap::generation_to_condemn (int n_initial,
          local_condemn_reasons->set_condition (gen_low_ephemeral_p);
          dprintf (GTC_LOG, ("h%d: low eph", heap_number));
  
+        if (!provisional_mode_triggered)
+        {
  #ifdef BACKGROUND_GC
-        if (!gc_can_use_concurrent || (generation_free_list_space (generation_of (max_generation)) == 0))
+            if (!gc_can_use_concurrent || (generation_free_list_space (generation_of (max_generation)) == 0))
  #endif //BACKGROUND_GC
-        {
-            //It is better to defragment first if we are running out of space for
-            //the ephemeral generation but we have enough fragmentation to make up for it
-            //in the non ephemeral generation. Essentially we are trading a gen2 for 
-            // having to expand heap in ephemeral collections.
-            if (dt_high_frag_p (tuning_deciding_condemned_gen, 
-                                max_generation - 1, 
-                                TRUE))
              {
-                high_fragmentation = TRUE;
-                local_condemn_reasons->set_condition (gen_max_high_frag_e_p);
-                dprintf (GTC_LOG, ("heap%d: gen1 frag", heap_number));
+                //It is better to defragment first if we are running out of space for
+                //the ephemeral generation but we have enough fragmentation to make up for it
+                //in the non ephemeral generation. Essentially we are trading a gen2 for 
+                // having to expand heap in ephemeral collections.
+                if (dt_high_frag_p (tuning_deciding_condemned_gen, 
+                                    max_generation - 1, 
+                                    TRUE))
+                {
+                    high_fragmentation = TRUE;
+                    local_condemn_reasons->set_condition (gen_max_high_frag_e_p);
+                    dprintf (GTC_LOG, ("heap%d: gen1 frag", heap_number));
+                }
              }
          }
      }
@@ -14929,10 +15189,6 @@ int gc_heap::generation_to_condemn (int n_initial,
      {
          dprintf (GTC_LOG, ("h%d: expand_in_full - BLOCK", heap_number));
          *blocking_collection_p = TRUE;
-        if (!check_only_p)
-        {
-            should_expand_in_full_gc = FALSE;
-        }
          evaluate_elevation = FALSE;
          n = max_generation;
          local_condemn_reasons->set_condition (gen_expand_fullgc_p);
@@ -14943,9 +15199,12 @@ int gc_heap::generation_to_condemn (int n_initial,
          dprintf (GTC_LOG, ("h%d: alloc full - BLOCK", heap_number));
          n = max_generation;
          *blocking_collection_p = TRUE;
+
          if ((local_settings->reason == reason_oos_loh) ||
              (local_settings->reason == reason_alloc_loh))
+        {
              evaluate_elevation = FALSE;
+        }
  
          local_condemn_reasons->set_condition (gen_before_oom);
      }
@@ -14973,7 +15232,7 @@ int gc_heap::generation_to_condemn (int n_initial,
          }
      }
  
-    if (evaluate_elevation && (low_ephemeral_space || high_memory_load || v_high_memory_load))
+    if (!provisional_mode_triggered && evaluate_elevation && (low_ephemeral_space || high_memory_load || v_high_memory_load))
      {
          *elevation_requested_p = TRUE;
  #ifdef BIT64
@@ -15028,7 +15287,7 @@ int gc_heap::generation_to_condemn (int n_initial,
  #endif // BIT64
      }
  
-    if ((n == (max_generation - 1)) && (n_alloc < (max_generation -1)))
+    if (!provisional_mode_triggered && (n == (max_generation - 1)) && (n_alloc < (max_generation -1)))
      {
          dprintf (GTC_LOG, ("h%d: budget %d, check 2",
                        heap_number, n_alloc));
@@ -15041,7 +15300,7 @@ int gc_heap::generation_to_condemn (int n_initial,
      }
  
      //figure out if max_generation is too fragmented -> blocking collection
-    if (n == max_generation)
+    if (!provisional_mode_triggered && (n == max_generation))
      {
          if (dt_high_frag_p (tuning_deciding_condemned_gen, n))
          {
@@ -15133,22 +15392,6 @@ exit:
          }
      }
  
-    if (n == max_generation && GCToEEInterface::ForceFullGCToBeBlocking())
-    {
-#ifdef BACKGROUND_GC
-        // do not turn stress-induced collections into blocking GCs, unless there
-        // have already been more full BGCs than full NGCs
-#if 0
-        // This exposes DevDiv 94129, so we'll leave this out for now
-        if (!settings.stress_induced ||
-            full_gc_counts[gc_type_blocking] <= full_gc_counts[gc_type_background])
-#endif // 0
-#endif // BACKGROUND_GC
-        {
-            *blocking_collection_p = TRUE;
-        }
-    }
-
      return n;
  }
  
@@ -15162,6 +15405,7 @@ size_t gc_heap::min_reclaim_fragmentation_threshold (uint32_t num_heaps)
      // if the memory load is higher, the threshold we'd want to collect gets lower.
      size_t min_mem_based_on_available = 
          (500 - (settings.entry_memory_load - high_memory_load_th) * 40) * 1024 * 1024 / num_heaps;
+
      size_t ten_percent_size = (size_t)((float)generation_size (max_generation) * 0.10);
      uint64_t three_percent_mem = mem_one_percent * 3 / num_heaps;
  
@@ -15325,6 +15569,17 @@ void gc_heap::gc1()
  
      int n = settings.condemned_generation;
  
+    if (settings.reason == reason_pm_full_gc)
+    {
+        assert (n == max_generation);
+        init_records();
+
+        gen_to_condemn_tuning* local_condemn_reasons = &(get_gc_data_per_heap()->gen_to_condemn_reasons);
+        local_condemn_reasons->init();
+        local_condemn_reasons->set_gen (gen_initial, n);
+        local_condemn_reasons->set_gen (gen_final_per_heap, n);
+    }
+
      update_collection_counts ();
  
  #ifdef BACKGROUND_GC
@@ -15728,23 +15983,25 @@ void gc_heap::gc1()
                      desired_per_heap = Align(smoothed_desired_per_heap, get_alignment_constant (true));
  #endif //0
  
-                    // if desired_per_heap is close to min_gc_size, trim it
-                    // down to min_gc_size to stay in the cache
-                    gc_heap* hp = gc_heap::g_heaps[0];
-                    dynamic_data* dd = hp->dynamic_data_of (gen);
-                    size_t min_gc_size = dd_min_size(dd);
-                    // if min GC size larger than true on die cache, then don't bother
-                    // limiting the desired size
-                    if ((min_gc_size <= GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)) &&
-                        desired_per_heap <= 2*min_gc_size)
+                    if (!heap_hard_limit)
                      {
-                        desired_per_heap = min_gc_size;
+                        // if desired_per_heap is close to min_gc_size, trim it
+                        // down to min_gc_size to stay in the cache
+                        gc_heap* hp = gc_heap::g_heaps[0];
+                        dynamic_data* dd = hp->dynamic_data_of (gen);
+                        size_t min_gc_size = dd_min_size(dd);
+                        // if min GC size larger than true on die cache, then don't bother
+                        // limiting the desired size
+                        if ((min_gc_size <= GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)) &&
+                            desired_per_heap <= 2*min_gc_size)
+                        {
+                            desired_per_heap = min_gc_size;
+                        }
                      }
  #ifdef BIT64
                      desired_per_heap = joined_youngest_desired (desired_per_heap);
                      dprintf (2, ("final gen0 new_alloc: %Id", desired_per_heap));
  #endif // BIT64
-
                      gc_data_global.final_youngest_desired = desired_per_heap;
                  }
  #if 1 //subsumed by the linear allocation model 
@@ -15758,7 +16015,7 @@ void gc_heap::gc1()
                      if (smoothing  > loh_count)
                          smoothing  = loh_count;
                      smoothed_desired_per_heap_loh = desired_per_heap / smoothing + ((smoothed_desired_per_heap_loh / smoothing) * (smoothing-1));
-                    dprintf( 2, ("smoothed_desired_per_heap_loh  = %Id  desired_per_heap = %Id", smoothed_desired_per_heap_loh, desired_per_heap));
+                    dprintf (2, ("smoothed_desired_per_heap_loh  = %Id  desired_per_heap = %Id", smoothed_desired_per_heap_loh, desired_per_heap));
                      desired_per_heap = Align(smoothed_desired_per_heap_loh, get_alignment_constant (false));
                  }
  #endif //0
@@ -15795,6 +16052,7 @@ void gc_heap::gc1()
  #endif //FEATURE_LOH_COMPACTION
  
              fire_pevents();
+            pm_full_gc_init_or_clear();
  
              gc_t_join.restart();
          }
@@ -15817,6 +16075,8 @@ void gc_heap::gc1()
          do_post_gc();
      }
  
+    pm_full_gc_init_or_clear();
+
  #ifdef BACKGROUND_GC
      recover_bgc_settings();
  #endif //BACKGROUND_GC
@@ -16511,6 +16771,9 @@ void gc_heap::allocate_for_no_gc_after_gc()
  
  void gc_heap::init_records()
  {
+    // An option is to move this to be after we figure out which gen to condemn so we don't 
+    // need to clear some generations' data 'cause we know they don't change, but that also means 
+    // we can't simply call memset here. 
      memset (&gc_data_per_heap, 0, sizeof (gc_data_per_heap));
      gc_data_per_heap.heap_index = heap_number;
      if (heap_number == 0)
@@ -16519,9 +16782,73 @@ void gc_heap::init_records()
  #ifdef GC_CONFIG_DRIVEN
      memset (interesting_data_per_gc, 0, sizeof (interesting_data_per_gc));
  #endif //GC_CONFIG_DRIVEN
+    memset (&fgm_result, 0, sizeof (fgm_result));
+
+    for (int i = 0; i <= (max_generation + 1); i++)
+    {
+        gc_data_per_heap.gen_data[i].size_before = generation_size (i);
+        generation* gen = generation_of (i);
+        gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen);
+        gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen);
+    }
+
+    sufficient_gen0_space_p = FALSE;
+
+#ifdef MULTIPLE_HEAPS
+    gen0_allocated_after_gc_p = false;
+#endif //MULTIPLE_HEAPS
+
+#if defined (_DEBUG) && defined (VERIFY_HEAP)
+    verify_pinned_queue_p = FALSE;
+#endif // _DEBUG && VERIFY_HEAP
  }
  
-int gc_heap::garbage_collect (int n)
+void gc_heap::pm_full_gc_init_or_clear()
+{
+    // This means the next GC will be a full blocking GC and we need to init.
+    if (settings.condemned_generation == (max_generation - 1))
+    {
+        if (pm_trigger_full_gc)
+        {
+#ifdef MULTIPLE_HEAPS
+            do_post_gc();
+#endif //MULTIPLE_HEAPS
+            dprintf (GTC_LOG, ("init for PM triggered full GC"));
+            uint32_t saved_entry_memory_load = settings.entry_memory_load;
+            settings.init_mechanisms();
+            settings.reason = reason_pm_full_gc;
+            settings.condemned_generation = max_generation;
+            settings.entry_memory_load = saved_entry_memory_load;
+            // Can't assert this since we only check at the end of gen2 GCs,
+            // during gen1 the memory load could have already dropped. 
+            // Although arguably we should just turn off PM then...
+            //assert (settings.entry_memory_load >= high_memory_load_th);
+            assert (settings.entry_memory_load > 0);
+            settings.gc_index += 1;
+            do_pre_gc();
+        }
+    }
+    // This means we are in the progress of a full blocking GC triggered by
+    // this PM mode.
+    else if (settings.reason == reason_pm_full_gc)
+    {
+        assert (settings.condemned_generation == max_generation);
+        assert (pm_trigger_full_gc);
+        pm_trigger_full_gc = false;
+
+        dprintf (GTC_LOG, ("PM triggered full GC done"));
+    }
+}
+
+void gc_heap::garbage_collect_pm_full_gc()
+{
+    assert (settings.condemned_generation == max_generation);
+    assert (settings.reason == reason_pm_full_gc);
+    assert (!settings.concurrent);
+    gc1();
+}
+
+void gc_heap::garbage_collect (int n)
  {
      //reset the number of alloc contexts
      alloc_contexts_used = 0;
@@ -16565,13 +16892,10 @@ int gc_heap::garbage_collect (int n)
      }
  
      init_records();
-    memset (&fgm_result, 0, sizeof (fgm_result));
  
      settings.reason = gc_trigger_reason;
-    verify_pinned_queue_p = FALSE;
-
  #if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE)
-        num_pinned_objects = 0;
+    num_pinned_objects = 0;
  #endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE
  
  #ifdef STRESS_HEAP
@@ -16586,97 +16910,93 @@ int gc_heap::garbage_collect (int n)
      //align all heaps on the max generation to condemn
      dprintf (3, ("Joining for max generation to condemn"));
      condemned_generation_num = generation_to_condemn (n, 
-                                                      &blocking_collection, 
-                                                      &elevation_requested, 
-                                                      FALSE);
+                                                    &blocking_collection, 
+                                                    &elevation_requested, 
+                                                    FALSE);
      gc_t_join.join(this, gc_join_generation_determined);
      if (gc_t_join.joined())
  #endif //MULTIPLE_HEAPS
      {
-#ifdef MULTIPLE_HEAPS
  #if !defined(SEG_MAPPING_TABLE) && !defined(FEATURE_BASICFREEZE)
          //delete old slots from the segment table
          seg_table->delete_old_slots();
  #endif //!SEG_MAPPING_TABLE && !FEATURE_BASICFREEZE
+
+#ifdef MULTIPLE_HEAPS
          for (int i = 0; i < n_heaps; i++)
          {
-            //copy the card and brick tables
-            if (g_gc_card_table != g_heaps[i]->card_table)
-            {
-                g_heaps[i]->copy_brick_card_table();
-            }
+            gc_heap* hp = g_heaps[i];
+            // check for card table growth
+            if (g_gc_card_table != hp->card_table)
+                hp->copy_brick_card_table();
  
-            g_heaps[i]->rearrange_large_heap_segments();
-            if (!recursive_gc_sync::background_running_p())
-            {
-                g_heaps[i]->rearrange_small_heap_segments();
-            }
-        }
-#else //MULTIPLE_HEAPS
+            hp->rearrange_large_heap_segments();
  #ifdef BACKGROUND_GC
-            //delete old slots from the segment table
-#if !defined(SEG_MAPPING_TABLE) && !defined(FEATURE_BASICFREEZE)
-            seg_table->delete_old_slots();
-#endif //!SEG_MAPPING_TABLE && !FEATURE_BASICFREEZE
-            rearrange_large_heap_segments();
+            hp->background_delay_delete_loh_segments();
              if (!recursive_gc_sync::background_running_p())
-            {
-                rearrange_small_heap_segments();
-            }
+                hp->rearrange_small_heap_segments();
  #endif //BACKGROUND_GC
-        // check for card table growth
+        }
+#else //MULTIPLE_HEAPS
          if (g_gc_card_table != card_table)
              copy_brick_card_table();
  
+        rearrange_large_heap_segments();
+#ifdef BACKGROUND_GC
+        background_delay_delete_loh_segments();
+        if (!recursive_gc_sync::background_running_p())
+            rearrange_small_heap_segments();
+#endif //BACKGROUND_GC
  #endif //MULTIPLE_HEAPS
  
-        BOOL should_evaluate_elevation = FALSE;
-        BOOL should_do_blocking_collection = FALSE;
+    BOOL should_evaluate_elevation = TRUE;
+    BOOL should_do_blocking_collection = FALSE;
  
  #ifdef MULTIPLE_HEAPS
-        int gen_max = condemned_generation_num;
-        for (int i = 0; i < n_heaps; i++)
-        {
-            if (gen_max < g_heaps[i]->condemned_generation_num)
-                gen_max = g_heaps[i]->condemned_generation_num;
-            if ((!should_evaluate_elevation) && (g_heaps[i]->elevation_requested))
-                should_evaluate_elevation = TRUE;
-            if ((!should_do_blocking_collection) && (g_heaps[i]->blocking_collection))
-                should_do_blocking_collection = TRUE;
-        }
+    int gen_max = condemned_generation_num;
+    for (int i = 0; i < n_heaps; i++)
+    {
+        if (gen_max < g_heaps[i]->condemned_generation_num)
+            gen_max = g_heaps[i]->condemned_generation_num;
+        if (should_evaluate_elevation && !(g_heaps[i]->elevation_requested))
+            should_evaluate_elevation = FALSE;
+        if ((!should_do_blocking_collection) && (g_heaps[i]->blocking_collection))
+            should_do_blocking_collection = TRUE;
+    }
  
-        settings.condemned_generation = gen_max;
+    settings.condemned_generation = gen_max;
  #else //MULTIPLE_HEAPS
-        settings.condemned_generation = generation_to_condemn (n, 
-                                                            &blocking_collection, 
-                                                            &elevation_requested, 
-                                                            FALSE);
-        should_evaluate_elevation = elevation_requested;
-        should_do_blocking_collection = blocking_collection;
-#endif //MULTIPLE_HEAPS
-
-        settings.condemned_generation = joined_generation_to_condemn (
-                                            should_evaluate_elevation, 
-                                            settings.condemned_generation,
-                                            &should_do_blocking_collection
-                                            STRESS_HEAP_ARG(n)
-                                            );
+    settings.condemned_generation = generation_to_condemn (n, 
+                                                        &blocking_collection, 
+                                                        &elevation_requested, 
+                                                        FALSE);
+    should_evaluate_elevation = elevation_requested;
+    should_do_blocking_collection = blocking_collection;
+#endif //MULTIPLE_HEAPS
  
-        STRESS_LOG1(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10, 
-                "condemned generation num: %d\n", settings.condemned_generation);
+    settings.condemned_generation = joined_generation_to_condemn (
+                                        should_evaluate_elevation,
+                                        n,
+                                        settings.condemned_generation,
+                                        &should_do_blocking_collection
+                                        STRESS_HEAP_ARG(n)
+                                        );
  
-        record_gcs_during_no_gc();
+    STRESS_LOG1(LF_GCROOTS|LF_GC|LF_GCALLOC, LL_INFO10, 
+            "condemned generation num: %d\n", settings.condemned_generation);
  
-        if (settings.condemned_generation > 1)
-            settings.promotion = TRUE;
+    record_gcs_during_no_gc();
+
+    if (settings.condemned_generation > 1)
+        settings.promotion = TRUE;
  
  #ifdef HEAP_ANALYZE
-        // At this point we've decided what generation is condemned
-        // See if we've been requested to analyze survivors after the mark phase
-        if (GCToEEInterface::AnalyzeSurvivorsRequested(settings.condemned_generation))
-        {
-            heap_analyze_enabled = TRUE;
-        }
+    // At this point we've decided what generation is condemned
+    // See if we've been requested to analyze survivors after the mark phase
+    if (GCToEEInterface::AnalyzeSurvivorsRequested(settings.condemned_generation))
+    {
+        heap_analyze_enabled = TRUE;
+    }
  #endif // HEAP_ANALYZE
  
          GCToEEInterface::DiagGCStart(settings.condemned_generation, settings.reason == reason_induced);
@@ -16709,7 +17029,7 @@ int gc_heap::garbage_collect (int n)
          // Call the EE for start of GC work
          // just one thread for MP GC
          GCToEEInterface::GcStartWork (settings.condemned_generation,
-                                 max_generation);            
+                                max_generation);            
  
          // TODO: we could fire an ETW event to say this GC as a concurrent GC but later on due to not being able to
          // create threads or whatever, this could be a non concurrent GC. Maybe for concurrent GC we should fire
@@ -16725,18 +17045,7 @@ int gc_heap::garbage_collect (int n)
  #endif //MULTIPLE_HEAPS
      }
  
-    {
-        int gen_num_for_data = max_generation + 1;
-        for (int i = 0; i <= gen_num_for_data; i++)
-        {
-            gc_data_per_heap.gen_data[i].size_before = generation_size (i);
-            generation* gen = generation_of (i);
-            gc_data_per_heap.gen_data[i].free_list_space_before = generation_free_list_space (gen);
-            gc_data_per_heap.gen_data[i].free_obj_space_before = generation_free_obj_space (gen);
-        }
-    }
-    descr_generations (TRUE);
-//    descr_card_table();
+        descr_generations (TRUE);
  
  #ifdef VERIFY_HEAP
      if ((GCConfig::GetHeapVerifyLevel() & GCConfig::HEAPVERIFY_GC) &&
@@ -16933,8 +17242,6 @@ done:
      if (settings.pause_mode == pause_no_gc)
          allocate_for_no_gc_after_gc();
  
-    int gn = settings.condemned_generation;
-    return gn;
  }
  
  #define mark_stack_empty_p() (mark_stack_base == mark_stack_tos)
@@ -17247,6 +17554,22 @@ uint8_t* gc_heap::find_object (uint8_t* o, uint8_t* low)
  }
  #endif //INTERIOR_POINTERS
  
+#ifdef MULTIPLE_HEAPS
+
+#ifdef MARK_LIST
+#ifdef GC_CONFIG_DRIVEN
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;}}
+#else //GC_CONFIG_DRIVEN
+#define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;}}
+#endif //GC_CONFIG_DRIVEN
+#else //MARK_LIST
+#define m_boundary(o) {}
+#endif //MARK_LIST
+
+#define m_boundary_fullgc(o) {}
+
+#else //MULTIPLE_HEAPS
+
  #ifdef MARK_LIST
  #ifdef GC_CONFIG_DRIVEN
  #define m_boundary(o) {if (mark_list_index <= mark_list_end) {*mark_list_index = o;mark_list_index++;} else {mark_list_index++;} if (slow > o) slow = o; if (shigh < o) shigh = o;}
@@ -17259,6 +17582,8 @@ uint8_t* gc_heap::find_object (uint8_t* o, uint8_t* low)
  
  #define m_boundary_fullgc(o) {if (slow > o) slow = o; if (shigh < o) shigh = o;}
  
+#endif //MULTIPLE_HEAPS
+
  #define method_table(o) ((CObjectHeader*)(o))->GetMethodTable()
  
  inline
@@ -17569,7 +17894,9 @@ void gc_heap::save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_obje
              record_interesting_data_point (idp_post_short_padded);
  #endif //SHORT_PLUGS
          m.set_post_short();
+#if defined (_DEBUG) && defined (VERIFY_HEAP)
          verify_pinned_queue_p = TRUE;
+#endif // _DEBUG && VERIFY_HEAP
  
  #ifdef COLLECTIBLE_CLASS
          if (is_collectible (last_object_in_last_plug))
@@ -17778,6 +18105,11 @@ void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL
                              *mark_stack_tos = oo;
                          }
                      }
+
+                    if (!contain_pointers (oo))
+                    {
+                        goto next_level;
+                    }
                  }
  #endif //COLLECTIBLE_CLASS
  
@@ -18385,6 +18717,11 @@ void gc_heap::background_mark_simple1 (uint8_t* oo THREAD_NUMBER_DCL)
                              *(background_mark_stack_tos++) = class_obj;
                          }
                      }
+
+                    if (!contain_pointers (oo))
+                    {
+                        goto next_level;
+                    }                    
                  }
  #endif //COLLECTIBLE_CLASS
  
@@ -18469,6 +18806,9 @@ void gc_heap::background_mark_simple1 (uint8_t* oo THREAD_NUMBER_DCL)
          }
  #endif //SORT_MARK_STACK
  
+#ifdef COLLECTIBLE_CLASS
+next_level:
+#endif // COLLECTIBLE_CLASS
          allow_fgc();
  
          if (!(background_mark_stack_tos == background_mark_stack_array))
@@ -19065,9 +19405,9 @@ size_t gc_heap::get_total_fragmentation()
      size_t total_fragmentation = 0;
  
  #ifdef MULTIPLE_HEAPS
-    for (int i = 0; i < gc_heap::n_heaps; i++)
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
      {
-        gc_heap* hp = gc_heap::g_heaps[i];
+        gc_heap* hp = gc_heap::g_heaps[hn];
  #else //MULTIPLE_HEAPS
      {
          gc_heap* hp = pGenGCHeap;
@@ -19082,6 +19422,43 @@ size_t gc_heap::get_total_fragmentation()
      return total_fragmentation;
  }
  
+size_t gc_heap::get_total_gen_fragmentation (int gen_number)
+{
+    size_t total_fragmentation = 0;
+
+#ifdef MULTIPLE_HEAPS
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[hn];
+#else //MULTIPLE_HEAPS
+    {
+        gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+        generation* gen = hp->generation_of (gen_number);
+        total_fragmentation += (generation_free_list_space (gen) + generation_free_obj_space (gen));
+    }
+
+    return total_fragmentation;
+}
+
+size_t gc_heap::get_total_gen_estimated_reclaim (int gen_number)
+{
+    size_t total_estimated_reclaim = 0;
+
+#ifdef MULTIPLE_HEAPS
+    for (int hn = 0; hn < gc_heap::n_heaps; hn++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[hn];
+#else //MULTIPLE_HEAPS
+    {
+        gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+        total_estimated_reclaim += hp->estimated_reclaim (gen_number);
+    }
+
+    return total_estimated_reclaim;
+}
+
  size_t gc_heap::committed_size()
  {
      generation* gen = generation_of (max_generation);
@@ -19127,6 +19504,25 @@ size_t gc_heap::get_total_committed_size()
      return total_committed;
  }
  
+size_t gc_heap::committed_size (bool loh_p, size_t* allocated)
+{
+    int gen_number = (loh_p ? (max_generation + 1) : max_generation);
+    generation* gen = generation_of (gen_number);
+    heap_segment* seg = heap_segment_rw (generation_start_segment (gen));
+    size_t total_committed = 0;
+    size_t total_allocated = 0;
+
+    while (seg)
+    {
+        total_committed += heap_segment_committed (seg) - (uint8_t*)seg;
+        total_allocated += heap_segment_allocated (seg) - (uint8_t*)seg;
+        seg = heap_segment_next (seg);
+    }
+
+    *allocated = total_allocated;
+    return total_committed;
+}
+
  void gc_heap::get_memory_info (uint32_t* memory_load, 
                                 uint64_t* available_physical,
                                 uint64_t* available_page_file)
@@ -19515,6 +19911,8 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
      {
  #endif //MULTIPLE_HEAPS
  
+        maxgen_size_inc_p = false;
+
          num_sizedrefs = GCToEEInterface::GetTotalNumSizedRefHandles();
  
  #ifdef MULTIPLE_HEAPS
@@ -19563,8 +19961,10 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
          mark_list_index = &mark_list [0];
  #endif //MARK_LIST
  
+#ifndef MULTIPLE_HEAPS
          shigh = (uint8_t*) 0;
          slow  = MAX_PTR;
+#endif //MULTIPLE_HEAPS
  
          //%type%  category = quote (mark);
  
@@ -19781,7 +20181,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
          GCScan::GcWeakPtrScanBySingleThread (condemned_gen_number, max_generation, &sc);
  
  #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
-        if (g_fEnableARM)
+        if (g_fEnableAppDomainMonitoring)
          {
              size_t promoted_all_heaps = 0;
  #ifdef MULTIPLE_HEAPS
@@ -19792,7 +20192,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
  #else
              promoted_all_heaps = promoted_bytes (heap_number);
  #endif //MULTIPLE_HEAPS
-            SystemDomain::RecordTotalSurvivedBytes (promoted_all_heaps);
+            GCToEEInterface::RecordTotalSurvivedBytes(promoted_all_heaps);
          }
  #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
  
@@ -20336,7 +20736,7 @@ void gc_heap::plan_generation_start (generation* gen, generation* consing_gen, u
          generation_allocation_pointer (consing_gen) += allocation_left;
      }
  
-    dprintf (1, ("plan alloc gen%d(%Ix) start at %Ix (ptr: %Ix, limit: %Ix, next: %Ix)", gen->gen_num, 
+    dprintf (2, ("plan alloc gen%d(%Ix) start at %Ix (ptr: %Ix, limit: %Ix, next: %Ix)", gen->gen_num, 
          generation_plan_allocation_start (gen),
          generation_plan_allocation_start_size (gen),
          generation_allocation_pointer (consing_gen), generation_allocation_limit (consing_gen),
@@ -20451,11 +20851,11 @@ retry:
      if ((active_old_gen_number > 0) &&
          (x >= generation_allocation_start (generation_of (active_old_gen_number - 1))))
      {
-        dprintf (1, ("crossing gen%d, x is %Ix", active_old_gen_number - 1, x));
+        dprintf (2, ("crossing gen%d, x is %Ix", active_old_gen_number - 1, x));
  
          if (!pinned_plug_que_empty_p())
          {
-            dprintf (1, ("oldest pin: %Ix(%Id)",
+            dprintf (2, ("oldest pin: %Ix(%Id)",
                  pinned_plug (oldest_pin()), 
                  (x - pinned_plug (oldest_pin()))));
          }
@@ -20565,7 +20965,7 @@ retry:
  
              plan_generation_start (generation_of (active_new_gen_number), consing_gen, x);
                  
-            dprintf (1, ("process eph: allocated gen%d start at %Ix", 
+            dprintf (2, ("process eph: allocated gen%d start at %Ix", 
                  active_new_gen_number,
                  generation_plan_allocation_start (generation_of (active_new_gen_number))));
  
@@ -20858,8 +21258,9 @@ retry:
      }
  }
  
-BOOL gc_heap::should_compact_loh()
+BOOL gc_heap::loh_compaction_requested()
  {
+    // If hard limit is specified GC will automatically decide if LOH needs to be compacted.
      return (loh_compaction_always_p || (loh_compaction_mode != loh_compaction_default));
  }
  
@@ -21030,7 +21431,7 @@ BOOL gc_heap::plan_loh()
  
  void gc_heap::compact_loh()
  {
-    assert (should_compact_loh());
+    assert (loh_compaction_requested() || heap_hard_limit);
  
      generation* gen        = large_object_generation;
      heap_segment* start_seg = heap_segment_rw (generation_start_segment (gen));
@@ -21411,6 +21812,9 @@ void gc_heap::plan_phase (int condemned_gen_number)
  #ifdef GC_CONFIG_DRIVEN
      dprintf (3, ("total number of marked objects: %Id (%Id)",
                   (mark_list_index - &mark_list[0]), ((mark_list_end - &mark_list[0]))));
+    
+    if (mark_list_index >= (mark_list_end + 1))
+        mark_list_index = mark_list_end + 1;
  #else
      dprintf (3, ("mark_list length: %Id",
                   (mark_list_index - &mark_list[0])));
@@ -21706,7 +22110,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
              generation_plan_allocation_start (temp_gen)));
      }
  
-    BOOL fire_pinned_plug_events_p = ETW_EVENT_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_Context, PinPlugAtGCTime);
+    BOOL fire_pinned_plug_events_p = EVENT_ENABLED(PinPlugAtGCTime);
      size_t last_plug_len = 0;
  
      while (1)
@@ -21795,7 +22199,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
  
                      dprintf(4, ("+%Ix+", (size_t)xl));
                      assert ((size (xl) > 0));
-                    assert ((size (xl) <= LARGE_OBJECT_SIZE));
+                    assert ((size (xl) <= loh_size_threshold));
  
                      last_object_in_plug = xl;
  
@@ -21934,7 +22338,10 @@ void gc_heap::plan_phase (int condemned_gen_number)
                      }
                      else
                      {
-                        allocate_in_condemned = TRUE;
+                        if (generation_allocator(older_gen)->discard_if_no_fit_p())
+                        {
+                            allocate_in_condemned = TRUE;
+                        }
  
                          new_address = allocate_in_condemned_generations (consing_gen, ps, active_old_gen_number, 
  #ifdef SHORT_PLUGS
@@ -21992,9 +22399,10 @@ void gc_heap::plan_phase (int condemned_gen_number)
              if (pinned_plug_p)
              {
                  if (fire_pinned_plug_events_p)
-                    FireEtwPinPlugAtGCTime(plug_start, plug_end, 
-                                           (merge_with_last_pin_p ? 0 : (uint8_t*)node_gap_size (plug_start)),
-                                           GetClrInstanceId());
+                {
+                    FIRE_EVENT(PinPlugAtGCTime, plug_start, plug_end, 
+                               (merge_with_last_pin_p ? 0 : (uint8_t*)node_gap_size (plug_start)));
+                }
  
                  if (merge_with_last_pin_p)
                  {
@@ -22042,7 +22450,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
                  set_node_relocation_distance (plug_start, (new_address - plug_start));
                  if (last_node && (node_relocation_distance (last_node) ==
                                    (node_relocation_distance (plug_start) +
-                                   node_gap_size (plug_start))))
+                                   (ptrdiff_t)node_gap_size (plug_start))))
                  {
                      //dprintf(3,( " Lb"));
                      dprintf (3, ("%Ix Lb", plug_start));
@@ -22278,33 +22686,33 @@ void gc_heap::plan_phase (int condemned_gen_number)
          size_t plan_gen2_size = generation_plan_size (max_generation);
          size_t growth = plan_gen2_size - old_gen2_size;
  
+        generation* older_gen = generation_of (settings.condemned_generation + 1);
+        size_t rejected_free_space = generation_free_obj_space (older_gen) - r_free_obj_space;
+        size_t free_list_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated;
+        size_t end_seg_allocated = generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated;
+        size_t condemned_allocated = generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated;
+
          if (growth > 0)
          {
-            dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, gen1 c alloc: %Id", 
-                growth, generation_end_seg_allocated (generation_of (max_generation)), 
-                generation_condemned_allocated (generation_of (max_generation - 1))));
+            dprintf (1, ("gen2 grew %Id (end seg alloc: %Id, condemned alloc: %Id", 
+                         growth, end_seg_allocated, condemned_allocated));
+
+            maxgen_size_inc_p = true;
          }
          else
          {
-            dprintf (1, ("gen2 shrank %Id (end seg alloc: %Id, gen1 c alloc: %Id", 
-                (old_gen2_size - plan_gen2_size), generation_end_seg_allocated (generation_of (max_generation)), 
-                generation_condemned_allocated (generation_of (max_generation - 1))));
+            dprintf (2, ("gen2 shrank %Id (end seg alloc: %Id, , condemned alloc: %Id, gen1 c alloc: %Id", 
+                         (old_gen2_size - plan_gen2_size), end_seg_allocated, condemned_allocated,
+                         generation_condemned_allocated (generation_of (max_generation - 1))));
          }
  
-        generation* older_gen = generation_of (settings.condemned_generation + 1);
-        size_t rejected_free_space = generation_free_obj_space (older_gen) - r_free_obj_space;
-        size_t free_list_allocated = generation_free_list_allocated (older_gen) - r_older_gen_free_list_allocated;
-        size_t end_seg_allocated = generation_end_seg_allocated (older_gen) - r_older_gen_end_seg_allocated;
-        size_t condemned_allocated = generation_condemned_allocated (older_gen) - r_older_gen_condemned_allocated;
-
          dprintf (1, ("older gen's free alloc: %Id->%Id, seg alloc: %Id->%Id, condemned alloc: %Id->%Id",
                      r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen),
                      r_older_gen_end_seg_allocated, generation_end_seg_allocated (older_gen), 
                      r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen)));
  
-        dprintf (1, ("this GC did %Id free list alloc(%Id bytes free space rejected), %Id seg alloc and %Id condemned alloc, gen1 condemned alloc is %Id", 
-            free_list_allocated, rejected_free_space, end_seg_allocated,
-            condemned_allocated, generation_condemned_allocated (generation_of (settings.condemned_generation))));
+        dprintf (1, ("this GC did %Id free list alloc(%Id bytes free space rejected)",
+            free_list_allocated, rejected_free_space));
  
          maxgen_size_increase* maxgen_size_info = &(get_gc_data_per_heap()->maxgen_size_info);
          maxgen_size_info->free_list_allocated = free_list_allocated;
@@ -22360,10 +22768,11 @@ void gc_heap::plan_phase (int condemned_gen_number)
  
  #ifdef BIT64
      if ((!settings.concurrent) &&
+        !provisional_mode_triggered &&
          ((condemned_gen_number < max_generation) && 
           ((settings.gen0_reduction_count > 0) || (settings.entry_memory_load >= 95))))
      {
-        dprintf (2, ("gen0 reduction count is %d, condemning %d, mem load %d",
+        dprintf (GTC_LOG, ("gen0 reduction count is %d, condemning %d, mem load %d",
                       settings.gen0_reduction_count,
                       condemned_gen_number,
                       settings.entry_memory_load));
@@ -22375,7 +22784,7 @@ void gc_heap::plan_phase (int condemned_gen_number)
          if ((condemned_gen_number >= (max_generation - 1)) && 
              dt_low_ephemeral_space_p (tuning_deciding_expansion))
          {
-            dprintf (2, ("Not enough space for all ephemeral generations with compaction"));
+            dprintf (GTC_LOG, ("Not enough space for all ephemeral generations with compaction"));
              should_expand = TRUE;
          }
      }
@@ -22454,91 +22863,99 @@ void gc_heap::plan_phase (int condemned_gen_number)
              }
          }
  
-        settings.demotion = FALSE;
-        int pol_max = policy_sweep;
+        if (maxgen_size_inc_p && provisional_mode_triggered)
+        {
+            pm_trigger_full_gc = true;
+            dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2"));
+        }
+        else
+        {
+            settings.demotion = FALSE;
+            int pol_max = policy_sweep;
  #ifdef GC_CONFIG_DRIVEN
-        BOOL is_compaction_mandatory = FALSE;
+            BOOL is_compaction_mandatory = FALSE;
  #endif //GC_CONFIG_DRIVEN
  
-        int i;
-        for (i = 0; i < n_heaps; i++)
-        {
-            if (pol_max < g_heaps[i]->gc_policy)
-                pol_max = policy_compact;
-            // set the demotion flag is any of the heap has demotion
-            if (g_heaps[i]->demotion_high >= g_heaps[i]->demotion_low)
+            int i;
+            for (i = 0; i < n_heaps; i++)
              {
-                (g_heaps[i]->get_gc_data_per_heap())->set_mechanism_bit (gc_demotion_bit);
-                settings.demotion = TRUE;
-            }
+                if (pol_max < g_heaps[i]->gc_policy)
+                    pol_max = policy_compact;
+                // set the demotion flag is any of the heap has demotion
+                if (g_heaps[i]->demotion_high >= g_heaps[i]->demotion_low)
+                {
+                    (g_heaps[i]->get_gc_data_per_heap())->set_mechanism_bit (gc_demotion_bit);
+                    settings.demotion = TRUE;
+                }
  
  #ifdef GC_CONFIG_DRIVEN
-            if (!is_compaction_mandatory)
-            {
-                int compact_reason = (g_heaps[i]->get_gc_data_per_heap())->get_mechanism (gc_heap_compact);
-                if (compact_reason >= 0)
+                if (!is_compaction_mandatory)
                  {
-                    if (gc_heap_compact_reason_mandatory_p[compact_reason])
-                        is_compaction_mandatory = TRUE;
+                    int compact_reason = (g_heaps[i]->get_gc_data_per_heap())->get_mechanism (gc_heap_compact);
+                    if (compact_reason >= 0)
+                    {
+                        if (gc_heap_compact_reason_mandatory_p[compact_reason])
+                            is_compaction_mandatory = TRUE;
+                    }
                  }
-            }
  #endif //GC_CONFIG_DRIVEN
-        }
+            }
  
  #ifdef GC_CONFIG_DRIVEN
-        if (!is_compaction_mandatory)
-        {
-            // If compaction is not mandatory we can feel free to change it to a sweeping GC.
-            // Note that we may want to change this to only checking every so often instead of every single GC.
-            if (should_do_sweeping_gc (pol_max >= policy_compact))
-            {
-                pol_max = policy_sweep;
-            }
-            else
+            if (!is_compaction_mandatory)
              {
-                if (pol_max == policy_sweep)
-                    pol_max = policy_compact;
+                // If compaction is not mandatory we can feel free to change it to a sweeping GC.
+                // Note that we may want to change this to only checking every so often instead of every single GC.
+                if (should_do_sweeping_gc (pol_max >= policy_compact))
+                {
+                    pol_max = policy_sweep;
+                }
+                else
+                {
+                    if (pol_max == policy_sweep)
+                        pol_max = policy_compact;
+                }
              }
-        }
  #endif //GC_CONFIG_DRIVEN
  
-        for (i = 0; i < n_heaps; i++)
-        {
-            if (pol_max > g_heaps[i]->gc_policy)
-                g_heaps[i]->gc_policy = pol_max;
-            //get the segment while we are serialized
-            if (g_heaps[i]->gc_policy == policy_expand)
+            for (i = 0; i < n_heaps; i++)
              {
-                g_heaps[i]->new_heap_segment =
-                     g_heaps[i]->soh_get_segment_to_expand();
-                if (!g_heaps[i]->new_heap_segment)
+                if (pol_max > g_heaps[i]->gc_policy)
+                    g_heaps[i]->gc_policy = pol_max;
+                //get the segment while we are serialized
+                if (g_heaps[i]->gc_policy == policy_expand)
                  {
-                    set_expand_in_full_gc (condemned_gen_number);
-                    //we are out of memory, cancel the expansion
-                    g_heaps[i]->gc_policy = policy_compact;
+                    g_heaps[i]->new_heap_segment =
+                        g_heaps[i]->soh_get_segment_to_expand();
+                    if (!g_heaps[i]->new_heap_segment)
+                    {
+                        set_expand_in_full_gc (condemned_gen_number);
+                        //we are out of memory, cancel the expansion
+                        g_heaps[i]->gc_policy = policy_compact;
+                    }
                  }
              }
-        }
  
-        BOOL is_full_compacting_gc = FALSE;
-
-        if ((gc_policy >= policy_compact) && (condemned_gen_number == max_generation))
-        {
-            full_gc_counts[gc_type_compacting]++;
-            is_full_compacting_gc = TRUE;
-        }
+            BOOL is_full_compacting_gc = FALSE;
  
-        for (i = 0; i < n_heaps; i++)
-        {
-            //copy the card and brick tables
-            if (g_gc_card_table!= g_heaps[i]->card_table)
+            if ((gc_policy >= policy_compact) && (condemned_gen_number == max_generation))
              {
-                g_heaps[i]->copy_brick_card_table();
+                full_gc_counts[gc_type_compacting]++;
+                is_full_compacting_gc = TRUE;
              }
  
-            if (is_full_compacting_gc)
+            for (i = 0; i < n_heaps; i++)
              {
-                g_heaps[i]->loh_alloc_since_cg = 0;
+                //copy the card and brick tables
+                if (g_gc_card_table!= g_heaps[i]->card_table)
+                {
+                    g_heaps[i]->copy_brick_card_table();
+                }
+
+                if (is_full_compacting_gc)
+                {
+                    g_heaps[i]->loh_alloc_since_cg = 0;
+                }
              }
          }
  
@@ -22559,31 +22976,66 @@ void gc_heap::plan_phase (int condemned_gen_number)
          rearrange_large_heap_segments ();
      }
  
-    settings.demotion = ((demotion_high >= demotion_low) ? TRUE : FALSE);
-    if (settings.demotion)
-        get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
+    if (maxgen_size_inc_p && provisional_mode_triggered)
+    {
+        pm_trigger_full_gc = true;
+        dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2"));
+    }
+    else
+    {
+        settings.demotion = ((demotion_high >= demotion_low) ? TRUE : FALSE);
+        if (settings.demotion)
+            get_gc_data_per_heap()->set_mechanism_bit (gc_demotion_bit);
  
  #ifdef GC_CONFIG_DRIVEN
-    BOOL is_compaction_mandatory = FALSE;
-    int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact);
-    if (compact_reason >= 0)
-        is_compaction_mandatory = gc_heap_compact_reason_mandatory_p[compact_reason];
+        BOOL is_compaction_mandatory = FALSE;
+        int compact_reason = get_gc_data_per_heap()->get_mechanism (gc_heap_compact);
+        if (compact_reason >= 0)
+            is_compaction_mandatory = gc_heap_compact_reason_mandatory_p[compact_reason];
+
+        if (!is_compaction_mandatory)
+        {
+            if (should_do_sweeping_gc (should_compact))
+                should_compact = FALSE;
+            else
+                should_compact = TRUE;
+        }
+#endif //GC_CONFIG_DRIVEN
  
-    if (!is_compaction_mandatory)
+        if (should_compact && (condemned_gen_number == max_generation))
+        {
+            full_gc_counts[gc_type_compacting]++;
+            loh_alloc_since_cg = 0;
+        }
+    }
+#endif //MULTIPLE_HEAPS
+
+    if (!pm_trigger_full_gc && pm_stress_on && provisional_mode_triggered)
      {
-        if (should_do_sweeping_gc (should_compact))
-            should_compact = FALSE;
-        else
-            should_compact = TRUE;
+        if ((settings.condemned_generation == (max_generation - 1)) &&
+            ((settings.gc_index % 5) == 0))
+        {
+            pm_trigger_full_gc = true;
+        }
      }
-#endif //GC_CONFIG_DRIVEN
  
-    if (should_compact && (condemned_gen_number == max_generation))
+    if (settings.condemned_generation == (max_generation - 1))
      {
-        full_gc_counts[gc_type_compacting]++;
-        loh_alloc_since_cg = 0;
+        if (provisional_mode_triggered)
+        {
+            if (should_expand)
+            {
+                should_expand = FALSE;
+                dprintf (GTC_LOG, ("h%d in PM cannot expand", heap_number));
+            }
+        }
+
+        if (pm_trigger_full_gc)
+        {
+            should_compact = FALSE;
+            dprintf (GTC_LOG, ("h%d PM doing sweeping", heap_number));
+        }
      }
-#endif //MULTIPLE_HEAPS
  
      if (should_compact)
      {
@@ -23397,8 +23849,17 @@ uint8_t* tree_search (uint8_t* tree, uint8_t* old_address)
  #ifdef FEATURE_BASICFREEZE
  bool gc_heap::frozen_object_p (Object* obj)
  {
+#ifdef MULTIPLE_HEAPS
+#ifdef SEG_MAPPING_TABLE
+    heap_segment* pSegment = seg_mapping_table_segment_of((uint8_t*)obj);
+#else
+    ptrdiff_t delta = 0;
+    heap_segment* pSegment = segment_of ((uint8_t*)obj, delta);
+#endif
+#else //MULTIPLE_HEAPS
      heap_segment* pSegment = gc_heap::find_segment ((uint8_t*)obj, FALSE);
      _ASSERTE(pSegment);
+#endif //MULTIPLE_HEAPS
  
      return heap_segment_read_only_p(pSegment);
  }
@@ -23817,7 +24278,7 @@ void gc_heap::relocate_shortened_survivor_helper (uint8_t* plug, uint8_t* plug_e
  
      while (x < plug_end)
      {
-        if (check_short_obj_p && ((plug_end - x) < min_pre_pin_obj_size))
+        if (check_short_obj_p && ((plug_end - x) < (DWORD)min_pre_pin_obj_size))
          {
              dprintf (3, ("last obj %Ix is short", x));
  
@@ -24874,22 +25335,10 @@ void gc_heap::gc_thread_stub (void* arg)
      gc_heap* heap = (gc_heap*)arg;
      if (!gc_thread_no_affinitize_p)
      {
-        GCThreadAffinity affinity;
-        affinity.Group = GCThreadAffinity::None;
-        affinity.Processor = GCThreadAffinity::None;
-
          // We are about to set affinity for GC threads. It is a good place to set up NUMA and
          // CPU groups because the process mask, processor number, and group number are all
          // readily available.
-        if (GCToOSInterface::CanEnableGCCPUGroups())
-            set_thread_group_affinity_for_heap(heap->heap_number, &affinity);
-        else
-            set_thread_affinity_mask_for_heap(heap->heap_number, &affinity);
-
-        if (!GCToOSInterface::SetThreadAffinity(&affinity))
-        {
-            dprintf(1, ("Failed to set thread affinity for server GC thread"));
-        }
+        set_thread_affinity_for_heap(heap->heap_number);
      }
  
      // server GC threads run at a higher priority than normal.
@@ -25294,7 +25743,7 @@ BOOL gc_heap::commit_mark_array_by_range (uint8_t* begin, uint8_t* end, uint32_t
                              size));
  #endif //SIMPLE_DPRINTF
  
-    if (GCToOSInterface::VirtualCommit (commit_start, size))
+    if (virtual_commit (commit_start, size))
      {
          // We can only verify the mark array is cleared from begin to end, the first and the last
          // page aren't necessarily all cleared 'cause they could be used by other segments or 
@@ -25539,9 +25988,9 @@ void gc_heap::decommit_mark_array_by_seg (heap_segment* seg)
          
          if (decommit_start < decommit_end)
          {
-            if (!GCToOSInterface::VirtualDecommit (decommit_start, size))
+            if (!virtual_decommit (decommit_start, size))
              {
-                dprintf (GC_TABLE_LOG, ("GCToOSInterface::VirtualDecommit on %Ix for %Id bytes failed", 
+                dprintf (GC_TABLE_LOG, ("decommit on %Ix for %Id bytes failed", 
                                          decommit_start, size));
                  assert (!"decommit failed");
              }
@@ -25606,8 +26055,10 @@ void gc_heap::background_mark_phase ()
  
          c_mark_list_index = 0;
  
+#ifndef MULTIPLE_HEAPS
          shigh = (uint8_t*) 0;
          slow  = MAX_PTR;
+#endif //MULTIPLE_HEAPS
  
          generation*   gen = generation_of (max_generation);
  
@@ -26632,7 +27083,7 @@ void gc_heap::background_promote_callback (Object** ppObject, ScanContext* sc,
  #endif //_DEBUG
  
      dprintf (3, ("Concurrent Background Promote %Ix", (size_t)o));
-    if (o && (size (o) > LARGE_OBJECT_SIZE))
+    if (o && (size (o) > loh_size_threshold))
      {
          dprintf (3, ("Brc %Ix", (size_t)o));
      }
@@ -27017,7 +27468,7 @@ void gc_heap::bgc_thread_function()
          // started and decided to do a BGC and waiting for a BGC thread to restart 
          // vm. That GC will be waiting in wait_to_proceed and we are waiting for it
          // to restart the VM so we deadlock.
-        //gc_heap::disable_preemptive (current_thread, TRUE);
+        //gc_heap::disable_preemptive (true);
      }
  
      FIRE_EVENT(GCTerminateConcurrentThread_V1);
@@ -27104,7 +27555,6 @@ void gc_heap::copy_cards (size_t dst_card,
              dsttmp |= 1 << dstbit;
          else
              dsttmp &= ~(1 << dstbit);
-
          if (!(++srcbit % 32))
          {
              srctmp = card_table[++srcwrd];
@@ -27373,11 +27823,9 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end)
              {
                  cardb++;
              }
-
              if (cardb == end_cardb)
                  return FALSE;
  
-            // We found a bundle, so go through its words and find a non-zero card word
              uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb),cardw)];
              uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1),cardw_end)];
              while ((card_word < card_word_end) && !(*card_word))
@@ -27411,7 +27859,7 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end)
  
          while (card_word < card_word_end)
          {
-            if (*card_word != 0)
+            if ((*card_word) != 0)
              {
                  cardw = (card_word - &card_table [0]);
                  return TRUE;
@@ -27419,9 +27867,10 @@ BOOL gc_heap::find_card_dword (size_t& cardw, size_t cardw_end)
  
              card_word++;
          }
-
          return FALSE;
+
      }
+
  }
  
  #endif //CARD_BUNDLE
@@ -27470,8 +27919,8 @@ BOOL gc_heap::find_card(uint32_t* card_table,
          {
              ++last_card_word;
          }
-        while ((last_card_word < &card_table [card_word_end]) && !(*last_card_word));
  
+        while ((last_card_word < &card_table [card_word_end]) && !(*last_card_word));
          if (last_card_word < &card_table [card_word_end])
          {
              card_word_value = *last_card_word;
@@ -27484,6 +27933,7 @@ BOOL gc_heap::find_card(uint32_t* card_table,
  #endif //CARD_BUNDLE
      }
  
+
      // Look for the lowest bit set
      if (card_word_value)
      {
@@ -27726,7 +28176,8 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating)
      size_t        n_eph             = 0;
      size_t        n_gen             = 0;
      size_t        n_card_set        = 0;
-    uint8_t*      nhigh             = (relocating ? heap_segment_plan_allocated (ephemeral_heap_segment) : high);
+    uint8_t*      nhigh             = (relocating ?
+                                       heap_segment_plan_allocated (ephemeral_heap_segment) : high);
  
      BOOL          foundp            = FALSE;
      uint8_t*      start_address     = 0;
@@ -27746,7 +28197,6 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating)
      {
          if (card_of(last_object) > card)
          {
-            // cg means cross-generational
              dprintf (3, ("Found %Id cg pointers", cg_pointers_found));
              if (cg_pointers_found == 0)
              {
@@ -27763,18 +28213,14 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating)
  
          if (card >= end_card)
          {
-            // Find the first card that's set (between card and card_word_end)
-            foundp = find_card(card_table, card, card_word_end, end_card);
+            foundp = find_card (card_table, card, card_word_end, end_card);
              if (foundp)
              {
-                // We found card(s) set. 
                  n_card_set += end_card - card;
                  start_address = max (beg, card_address (card));
              }
-
              limit = min (end, card_address (end_card));
          }
-
          if (!foundp || (last_object >= end) || (card_address (card) >= end))
          {
              if (foundp && (cg_pointers_found == 0))
@@ -27785,10 +28231,8 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating)
                  n_card_set -= (card_of (end) - card);
                  total_cards_cleared += (card_of (end) - card);
              }
-
              n_eph += cg_pointers_found;
              cg_pointers_found = 0;
-
              if ((seg = heap_segment_next_in_range (seg)) != 0)
              {
  #ifdef BACKGROUND_GC
@@ -27808,10 +28252,10 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating)
              }
          }
  
-        // We've found a card and will now go through the objects in it.
          assert (card_set_p (card));
          {
              uint8_t* o = last_object;
+
              o = find_first_object (start_address, last_object);
              // Never visit an object twice.
              assert (o >= last_object);
@@ -29481,18 +29925,17 @@ void gc_heap::set_static_data()
          dd->sdata = sdata;
          dd->min_size = sdata->min_size;
  
-        dprintf (GTC_LOG, ("PM: %d - min: %Id, max: %Id, fr_l: %Id, fr_b: %d%%",
-            settings.pause_mode,
-            dd->min_size, dd_max_size, 
-            dd->fragmentation_limit, (int)(dd->fragmentation_burden_limit * 100)));
+        dprintf (GTC_LOG, ("PM: %d, gen%d:  min: %Id, max: %Id, fr_l: %Id, fr_b: %d%%",
+            settings.pause_mode,i, 
+            dd->min_size, dd_max_size (dd), 
+            sdata->fragmentation_limit, (int)(sdata->fragmentation_burden_limit * 100)));
      }
  }
  
  // Initialize the values that are not const.
  void gc_heap::init_static_data()
  {
-    size_t gen0size = GCHeap::GetValidGen0MaxSize(get_valid_segment_size());
-    size_t gen0_min_size = Align(gen0size / 8 * 5);
+    size_t gen0_min_size = get_gen0_min_size();
  
      size_t gen0_max_size =
  #ifdef MULTIPLE_HEAPS
@@ -29503,8 +29946,26 @@ void gc_heap::init_static_data()
              max (6*1024*1024,  min ( Align(soh_segment_size/2), 200*1024*1024)));
  #endif //MULTIPLE_HEAPS
  
+    if (heap_hard_limit)
+    {
+        size_t gen0_max_size_seg = soh_segment_size / 4;
+        dprintf (GTC_LOG, ("limit gen0 max %Id->%Id", gen0_max_size, gen0_max_size_seg));
+        gen0_max_size = min (gen0_max_size, gen0_max_size_seg);
+    }
+
+    size_t gen0_max_size_config = (size_t)GCConfig::GetGCGen0MaxBudget();
+
+    if (gen0_max_size_config)
+    {
+        gen0_max_size = min (gen0_max_size, gen0_max_size_config);
+    }
+
+    gen0_max_size = Align (gen0_max_size);
+
+    gen0_min_size = min (gen0_min_size, gen0_max_size);
+
      // TODO: gen0_max_size has a 200mb cap; gen1_max_size should also have a cap.
-    size_t gen1_max_size = 
+    size_t gen1_max_size = (size_t)
  #ifdef MULTIPLE_HEAPS
          max (6*1024*1024, Align(soh_segment_size/2));
  #else //MULTIPLE_HEAPS
@@ -29513,8 +29974,8 @@ void gc_heap::init_static_data()
              max (6*1024*1024, Align(soh_segment_size/2)));
  #endif //MULTIPLE_HEAPS
  
-    dprintf (GTC_LOG, ("gen0size: %Id, gen0 min: %Id, max: %Id, gen1 max: %Id",
-        gen0size, gen0_min_size, gen0_max_size, gen1_max_size));
+    dprintf (GTC_LOG, ("gen0 min: %Id, max: %Id, gen1 max: %Id",
+        gen0_min_size, gen0_max_size, gen1_max_size));
  
      for (int i = latency_level_first; i <= latency_level_last; i++)
      {
@@ -29652,6 +30113,18 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
                  uint32_t memory_load = 0;
                  uint64_t available_physical = 0;
                  get_memory_info (&memory_load, &available_physical);
+#ifdef TRACE_GC
+                if (heap_hard_limit)
+                {
+                    size_t loh_allocated = 0;
+                    size_t loh_committed = committed_size (true, &loh_allocated);
+                    dprintf (1, ("GC#%Id h%d, GMI: LOH budget, LOH commit %Id (obj %Id, frag %Id), total commit: %Id (recorded: %Id)", 
+                        (size_t)settings.gc_index, heap_number, 
+                        loh_committed, loh_allocated,
+                        dd_fragmentation (dynamic_data_of (max_generation + 1)),
+                        get_total_committed_size(), (current_total_committed - current_total_committed_bookkeeping)));
+                }
+#endif //TRACE_GC
                  if (heap_number == 0)
                      settings.exit_memory_load = memory_load;
                  if (available_physical > 1024*1024)
@@ -29722,8 +30195,8 @@ size_t gc_heap::desired_new_allocation (dynamic_data* dd,
  
  #ifdef SIMPLE_DPRINTF
          dprintf (1, ("h%d g%d surv: %Id current: %Id alloc: %Id (%d%%) f: %d%% new-size: %Id new-alloc: %Id",
-                     heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)),
-                     (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
+                    heap_number, gen_number, out, current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd)),
+                    (int)(cst*100), (int)(f*100), current_size + new_allocation, new_allocation));
  #else
          dprintf (1,("gen: %d in: %Id out: %Id ", gen_number, generation_allocation_size (generation_of (gen_number)), out));
          dprintf (1,("current: %Id alloc: %Id ", current_size, (dd_desired_allocation (dd) - dd_gc_new_allocation (dd))));
@@ -30072,6 +30545,7 @@ void gc_heap::decommit_ephemeral_segment_pages()
      }
  
      size_t slack_space = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment);
+
      dynamic_data* dd = dynamic_data_of (0);
  
  #ifndef MULTIPLE_HEAPS
@@ -30115,18 +30589,6 @@ void gc_heap::decommit_ephemeral_segment_pages()
      current_gc_data_per_heap->extra_gen0_committed = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment);
  }
  
-size_t gc_heap::new_allocation_limit (size_t size, size_t free_size, int gen_number)
-{
-    dynamic_data* dd        = dynamic_data_of (gen_number);
-    ptrdiff_t           new_alloc = dd_new_allocation (dd);
-    assert (new_alloc == (ptrdiff_t)Align (new_alloc,
-                                           get_alignment_constant (!(gen_number == (max_generation+1)))));
-    size_t        limit     = min (max (new_alloc, (ptrdiff_t)size), (ptrdiff_t)free_size);
-    assert (limit == Align (limit, get_alignment_constant (!(gen_number == (max_generation+1)))));
-    dd_new_allocation (dd) = (new_alloc - limit );
-    return limit;
-}
-
  //This is meant to be called by decide_on_compacting.
  
  size_t gc_heap::generation_fragmentation (generation* gen,
@@ -30205,6 +30667,25 @@ size_t gc_heap::generation_sizes (generation* gen)
      return result;
  }
  
+size_t gc_heap::estimated_reclaim (int gen_number)
+{
+    dynamic_data* dd = dynamic_data_of (gen_number);
+    size_t gen_allocated = (dd_desired_allocation (dd) - dd_new_allocation (dd));
+    size_t gen_total_size = gen_allocated + dd_current_size (dd);
+    size_t est_gen_surv = (size_t)((float) (gen_total_size) * dd_surv (dd));
+    size_t est_gen_free = gen_total_size - est_gen_surv + dd_fragmentation (dd);
+
+    dprintf (GTC_LOG, ("h%d gen%d total size: %Id, est dead space: %Id (s: %d, allocated: %Id), frag: %Id",
+                heap_number, gen_number,
+                gen_total_size,
+                est_gen_free, 
+                (int)(dd_surv (dd) * 100),
+                gen_allocated,
+                dd_fragmentation (dd)));
+
+    return est_gen_free;
+}
+
  BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
                                      size_t fragmentation,
                                      BOOL& should_expand)
@@ -30217,7 +30698,9 @@ BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
      float  fragmentation_burden = ( ((0 == fragmentation) || (0 == gen_sizes)) ? (0.0f) :
                                      (float (fragmentation) / gen_sizes) );
  
-    dprintf (GTC_LOG, ("fragmentation: %Id (%d%%)", fragmentation, (int)(fragmentation_burden * 100.0)));
+    dprintf (GTC_LOG, ("h%d g%d fragmentation: %Id (%d%%)", 
+        heap_number, settings.condemned_generation, 
+        fragmentation, (int)(fragmentation_burden * 100.0)));
  
  #ifdef STRESS_HEAP
      // for pure GC stress runs we need compaction, for GC stress "mix"
@@ -30259,9 +30742,25 @@ BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
          get_gc_data_per_heap()->set_mechanism (gc_heap_compact, compact_induced_compacting);
      }
  
+    if (settings.reason == reason_pm_full_gc)
+    {
+        assert (condemned_gen_number == max_generation);
+        if (heap_number == 0)
+        {
+            dprintf (GTC_LOG, ("PM doing compacting full GC after a gen1"));
+        }
+        should_compact = TRUE;
+    }
+
      dprintf (2, ("Fragmentation: %d Fragmentation burden %d%%",
                  fragmentation, (int) (100*fragmentation_burden)));
  
+    if (provisional_mode_triggered && (condemned_gen_number == (max_generation - 1)))
+    {
+        dprintf (GTC_LOG, ("gen1 in PM always compact"));
+        should_compact = TRUE;
+    }
+
      if (!should_compact)
      {
          if (dt_low_ephemeral_space_p (tuning_deciding_compaction))
@@ -30322,6 +30821,7 @@ BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
  #endif // MULTIPLE_HEAPS
              
              ptrdiff_t reclaim_space = generation_size(max_generation) - generation_plan_size(max_generation);
+
              if((settings.entry_memory_load >= high_memory_load_th) && (settings.entry_memory_load < v_high_memory_load_th))
              {
                  if(reclaim_space > (int64_t)(min_high_fragmentation_threshold (entry_available_physical_mem, num_heaps)))
@@ -30366,7 +30866,9 @@ BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
              (generation_plan_allocation_start (generation_of (max_generation - 1)) >= 
                  generation_allocation_start (generation_of (max_generation - 1))))
          {
-            dprintf (2, (" Elevation: gen2 size: %d, gen2 plan size: %d, no progress, elevation = locked",
+            dprintf (1, ("gen1 start %Ix->%Ix, gen2 size %Id->%Id, lock elevation",
+                    generation_allocation_start (generation_of (max_generation - 1)),
+                    generation_plan_allocation_start (generation_of (max_generation - 1)),
                       generation_size (max_generation),
                       generation_plan_size (max_generation)));
              //no progress -> lock
@@ -30384,7 +30886,7 @@ BOOL gc_heap::decide_on_compacting (int condemned_gen_number,
          }
      }
  
-    dprintf (2, ("will %s", (should_compact ? "compact" : "sweep")));
+    dprintf (2, ("will %s(%s)", (should_compact ? "compact" : "sweep"), (should_expand ? "ex" : "")));
      return should_compact;
  }
  
@@ -30399,6 +30901,39 @@ size_t gc_heap::approximate_new_allocation()
      return max (2*dd_min_size (dd0), ((dd_desired_allocation (dd0)*2)/3));
  }
  
+BOOL gc_heap::sufficient_space_end_seg (uint8_t* start, uint8_t* seg_end, size_t end_space_required, gc_tuning_point tp)
+{
+    BOOL can_fit = FALSE;
+    size_t end_seg_space = (size_t)(seg_end - start);
+    if (end_seg_space > end_space_required)
+    {
+        // If hard limit is specified, and if we attributed all that's left in commit to the ephemeral seg
+        // so we treat that as segment end, do we have enough space.
+        if (heap_hard_limit)
+        {
+            size_t left_in_commit = heap_hard_limit - current_total_committed;
+            int num_heaps = 1;
+#ifdef MULTIPLE_HEAPS
+            num_heaps = n_heaps;
+#endif //MULTIPLE_HEAPS
+            left_in_commit /= num_heaps;
+            if (left_in_commit > end_space_required)
+            {
+                can_fit = TRUE;
+            }
+
+            dprintf (2, ("h%d end seg %Id, but only %Id left in HARD LIMIT commit, required: %Id %s on eph (%d)",
+                heap_number, end_seg_space, 
+                left_in_commit, end_space_required, 
+                (can_fit ? "ok" : "short"), (int)tp));
+        }
+        else
+            can_fit = TRUE;
+    }
+
+    return can_fit;
+}
+
  // After we did a GC we expect to have at least this 
  // much space at the end of the segment to satisfy
  // a reasonable amount of allocation requests.
@@ -30410,7 +30945,7 @@ size_t gc_heap::end_space_after_gc()
  BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
  {
      uint8_t* start = 0;
-    
+
      if ((tp == tuning_deciding_condemned_gen) ||
          (tp == tuning_deciding_compaction))
      {
@@ -30454,11 +30989,17 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
          assert (settings.condemned_generation >= (max_generation-1));
          size_t gen0size = approximate_new_allocation();
          size_t eph_size = gen0size;
+        size_t gen_min_sizes = 0;
  
          for (int j = 1; j <= max_generation-1; j++)
          {
-            eph_size += 2*dd_min_size (dynamic_data_of(j));
+            gen_min_sizes += 2*dd_min_size (dynamic_data_of(j));
          }
+
+        eph_size += gen_min_sizes;
+
+        dprintf (3, ("h%d deciding on expansion, need %Id (gen0: %Id, 2*min: %Id)", 
+            heap_number, gen0size, gen_min_sizes, eph_size));
          
          // We must find room for one large object and enough room for gen0size
          if ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - start) > eph_size)
@@ -30507,6 +31048,8 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
              {
                  if (large_chunk_found)
                  {
+                    sufficient_gen0_space_p = TRUE;
+
                      dprintf (3, ("Enough room"));
                      return TRUE;
                  }
@@ -30532,7 +31075,7 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
          if ((tp == tuning_deciding_condemned_gen) ||
              (tp == tuning_deciding_full_gc))
          {
-            end_space = 2*dd_min_size (dd);
+            end_space = max (2*dd_min_size (dd), end_space_after_gc());
          }
          else
          {
@@ -30540,11 +31083,9 @@ BOOL gc_heap::ephemeral_gen_fit_p (gc_tuning_point tp)
              end_space = approximate_new_allocation();
          }
  
-        if (!((size_t)(heap_segment_reserved (ephemeral_heap_segment) - start) > end_space))
-        {
-            dprintf (GTC_LOG, ("ephemeral_gen_fit_p: does not fit without compaction"));
-        }
-        return ((size_t)(heap_segment_reserved (ephemeral_heap_segment) - start) > end_space);
+        BOOL can_fit = sufficient_space_end_seg (start, heap_segment_reserved (ephemeral_heap_segment), end_space, tp);
+
+        return can_fit;
      }
  }
  
@@ -30559,23 +31100,11 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, int64_t& alloc_byte
      acontext.set_alloc_heap(vm_heap);
  #endif //MULTIPLE_HEAPS
  
-#ifdef MARK_ARRAY
-    uint8_t* current_lowest_address = lowest_address;
-    uint8_t* current_highest_address = highest_address;
-#ifdef BACKGROUND_GC
-    if (recursive_gc_sync::background_running_p())
-    {
-        current_lowest_address = background_saved_lowest_address;
-        current_highest_address = background_saved_highest_address;
-    }
-#endif //BACKGROUND_GC
-#endif // MARK_ARRAY
-
-    #if BIT64
+#if BIT64
      size_t maxObjectSize = (INT64_MAX - 7 - Align(min_obj_size));
-    #else
+#else
      size_t maxObjectSize = (INT32_MAX - 7 - Align(min_obj_size));
-    #endif
+#endif
  
      if (jsize >= maxObjectSize)
      {
@@ -30607,6 +31136,18 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, int64_t& alloc_byte
  #pragma inline_depth(20)
  #endif //_MSC_VER
  
+#ifdef MARK_ARRAY
+    uint8_t* current_lowest_address = lowest_address;
+    uint8_t* current_highest_address = highest_address;
+#ifdef BACKGROUND_GC
+    if (recursive_gc_sync::background_running_p())
+    {
+        current_lowest_address = background_saved_lowest_address;
+        current_highest_address = background_saved_highest_address;
+    }
+#endif //BACKGROUND_GC
+#endif // MARK_ARRAY
+
  #ifdef FEATURE_LOH_COMPACTION
      // The GC allocator made a free object already in this alloc context and
      // adjusted the alloc_ptr accordingly.
@@ -30632,7 +31173,7 @@ CObjectHeader* gc_heap::allocate_large_object (size_t jsize, int64_t& alloc_byte
  #ifdef BACKGROUND_GC
          //the object has to cover one full mark uint32_t
          assert (size > mark_word_size);
-        if (current_c_gc_state == c_gc_state_marking)
+        if (current_c_gc_state != c_gc_state_free)
          {
              dprintf (3, ("Concurrent allocation of a large object %Ix",
                          (size_t)obj));
@@ -30674,7 +31215,7 @@ void reset_memory (uint8_t* o, size_t sizeo)
              // We don't do unlock because there could be many processes using workstation GC and it's
              // bad perf to have many threads doing unlock at the same time.
              bool unlock_p = false;
-#endif // MULTIPLE_HEAPS
+#endif //MULTIPLE_HEAPS
  
              reset_mm_p = GCToOSInterface::VirtualReset((void*)page_start, size, unlock_p);
          }
@@ -30802,6 +31343,30 @@ BOOL gc_heap::background_object_marked (uint8_t* o, BOOL clearp)
      return m;
  }
  
+void gc_heap::background_delay_delete_loh_segments()
+{
+    generation* gen = large_object_generation;
+    heap_segment* seg = heap_segment_rw (generation_start_segment (large_object_generation));
+    heap_segment* prev_seg = 0;
+
+    while (seg)
+    {
+        heap_segment* next_seg = heap_segment_next (seg);
+        if (seg->flags & heap_segment_flags_loh_delete)
+        {
+            dprintf (3, ("deleting %Ix-%Ix-%Ix", (size_t)seg, heap_segment_allocated (seg), heap_segment_reserved (seg)));
+            delete_heap_segment (seg, (GCConfig::GetRetainVM() != 0));
+            heap_segment_next (prev_seg) = next_seg;
+        }
+        else
+        {
+            prev_seg = seg;
+        }
+
+        seg = next_seg;
+    }
+}
+
  uint8_t* gc_heap::background_next_end (heap_segment* seg, BOOL large_objects_p)
  {
      return
@@ -30831,12 +31396,15 @@ void gc_heap::generation_delete_heap_segment (generation* gen,
      dprintf (3, ("bgc sweep: deleting seg %Ix", seg));
      if (gen == large_object_generation)
      {
-        heap_segment_next (prev_seg) = next_seg;
-
          dprintf (3, ("Preparing empty large segment %Ix for deletion", (size_t)seg));
  
-        heap_segment_next (seg) = freeable_large_heap_segment;
-        freeable_large_heap_segment = seg;
+        // We cannot thread segs in here onto freeable_large_heap_segment because 
+        // grow_brick_card_tables could be committing mark array which needs to read 
+        // the seg list. So we delay it till next time we suspend EE.
+        seg->flags |= heap_segment_flags_loh_delete;
+        // Since we will be decommitting the seg, we need to prevent heap verification
+        // to verify this segment.
+        heap_segment_allocated (seg) = heap_segment_mem (seg);
      }
      else
      {
@@ -30867,22 +31435,20 @@ void gc_heap::process_background_segment_end (heap_segment* seg,
      *delete_p = FALSE;
      uint8_t* allocated = heap_segment_allocated (seg);
      uint8_t* background_allocated = heap_segment_background_allocated (seg);
+    BOOL loh_p = heap_segment_loh_p (seg);
  
      dprintf (3, ("Processing end of background segment [%Ix, %Ix[(%Ix[)", 
                  (size_t)heap_segment_mem (seg), background_allocated, allocated));
  
-
-    if (allocated != background_allocated)
+    if (!loh_p && (allocated != background_allocated))
      {
-        if (gen == large_object_generation)
-        {
-            FATAL_GC_ERROR();
-        }
+        assert (gen != large_object_generation);
  
          dprintf (3, ("Make a free object before newly promoted objects [%Ix, %Ix[", 
                      (size_t)last_plug_end, background_allocated));
          thread_gap (last_plug_end, background_allocated - last_plug_end, generation_of (max_generation));
  
+
          fix_brick_to_highest (last_plug_end, background_allocated);
  
          // When we allowed fgc's during going through gaps, we could have erased the brick
@@ -31248,6 +31814,7 @@ void gc_heap::background_sweep()
  
      FIRE_EVENT(BGC2ndNonConEnd);
  
+    loh_alloc_thread_count = 0;
      current_bgc_state = bgc_sweep_soh;
      verify_soh_segment_list();
  
@@ -31296,6 +31863,8 @@ void gc_heap::background_sweep()
  
      background_ephemeral_sweep();
  
+    concurrent_print_time_delta ("Swe eph");
+
  #ifdef MULTIPLE_HEAPS
      bgc_t_join.join(this, gc_join_after_ephemeral_sweep);
      if (bgc_t_join.joined())
@@ -31389,6 +31958,21 @@ void gc_heap::background_sweep()
                  if (gen != large_object_generation)
                  {
                      dprintf (2, ("bgs: sweeping gen3 objects"));
+                    concurrent_print_time_delta ("Swe SOH");
+                    FIRE_EVENT(BGC1stSweepEnd, 0);
+
+                    enter_spin_lock (&more_space_lock_loh);
+                    add_saved_spinlock_info (true, me_acquire, mt_bgc_loh_sweep);
+
+                    concurrent_print_time_delta ("Swe LOH took msl");
+
+                    // We wait till all allocating threads are completely done.
+                    int spin_count = yp_spin_count_unit;
+                    while (loh_alloc_thread_count)
+                    {
+                        spin_and_switch (spin_count, (loh_alloc_thread_count == 0));
+                    }
+
                      current_bgc_state = bgc_sweep_loh;
                      gen = generation_of (max_generation+1);
                      start_seg = heap_segment_rw (generation_start_segment (gen));
@@ -31533,6 +32117,9 @@ void gc_heap::background_sweep()
          reset_seg = heap_segment_next_rw (reset_seg);
      }
  
+    generation* loh_gen = generation_of (max_generation + 1);
+    generation_allocation_segment (loh_gen) = heap_segment_rw (generation_start_segment (loh_gen));
+
      // We calculate dynamic data here because if we wait till we signal the lh event, 
      // the allocation thread can change the fragmentation and we may read an intermediate
      // value (which can be greater than the generation size). Plus by that time it won't 
@@ -31564,6 +32151,9 @@ void gc_heap::background_sweep()
          gc_lh_block_event.Set();
      }
  
+    add_saved_spinlock_info (true, me_release, mt_bgc_loh_sweep);
+    leave_spin_lock (&more_space_lock_loh);
+
      //dprintf (GTC_LOG, ("---- (GC%d)End Background Sweep Phase ----", VolatileLoad(&settings.gc_index)));
      dprintf (GTC_LOG, ("---- (GC%d)ESw ----", VolatileLoad(&settings.gc_index)));
  }
@@ -33276,7 +33866,7 @@ gc_heap::verify_heap (BOOL begin_gc_p)
      if (current_join->joined())
  #endif //MULTIPLE_HEAPS
      {
-        SyncBlockCache::GetSyncBlockCache()->VerifySyncTableEntry();
+        GCToEEInterface::VerifySyncTableEntry();
  #ifdef MULTIPLE_HEAPS
          current_join->restart();
  #endif //MULTIPLE_HEAPS
@@ -33445,7 +34035,7 @@ HRESULT GCHeap::Init(size_t hn)
  }
  
  //System wide initialization
-HRESULT GCHeap::Initialize ()
+HRESULT GCHeap::Initialize()
  {
      HRESULT hr = S_OK;
  
@@ -33459,28 +34049,125 @@ HRESULT GCHeap::Initialize ()
      CreatedObjectCount = 0;
  #endif //TRACE_GC
  
-    size_t seg_size = get_valid_segment_size();
-    gc_heap::soh_segment_size = seg_size;
-    size_t large_seg_size = get_valid_segment_size(TRUE);
-    gc_heap::min_loh_segment_size = large_seg_size;
-    gc_heap::min_segment_size = min (seg_size, large_seg_size);
-#ifdef SEG_MAPPING_TABLE
-    gc_heap::min_segment_size_shr = index_of_set_bit (gc_heap::min_segment_size);
-#endif //SEG_MAPPING_TABLE
+    bool is_restricted; 
+    gc_heap::total_physical_mem = GCToOSInterface::GetPhysicalMemoryLimit (&is_restricted);
+
+#ifdef BIT64
+    gc_heap::heap_hard_limit = (size_t)GCConfig::GetGCHeapHardLimit();
+
+    if (!(gc_heap::heap_hard_limit))
+    {
+        uint32_t percent_of_mem = (uint32_t)GCConfig::GetGCHeapHardLimitPercent();
+        if ((percent_of_mem > 0) && (percent_of_mem < 100))
+        {
+            gc_heap::heap_hard_limit = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem / (uint64_t)100);
+        }
+    }
+
+    // If the hard limit is specified, the user is saying even if the process is already
+    // running in a container, use this limit for the GC heap.
+    if (!(gc_heap::heap_hard_limit))
+    {
+        if (is_restricted)
+        {
+            uint64_t physical_mem_for_gc = gc_heap::total_physical_mem * (uint64_t)75 / (uint64_t)100;
+            //printf ("returned physical mem %I64d, setting it to max (75%%: %I64d, 20mb)\n",
+            //    gc_heap::total_physical_mem, physical_mem_for_gc);
+            gc_heap::heap_hard_limit = (size_t)max ((20 * 1024 * 1024), physical_mem_for_gc);
+        }
+    }
+
+    //printf ("heap_hard_limit is %Id, total physical mem: %Id, %s restricted\n", 
+    //    gc_heap::heap_hard_limit, gc_heap::total_physical_mem, (is_restricted ? "is" : "is not"));
+#endif //BIT64
+
+    uint32_t nhp = 1;
+    uint32_t nhp_from_config = 0;
  
  #ifdef MULTIPLE_HEAPS
-    if (GCConfig::GetNoAffinitize())
-        gc_heap::gc_thread_no_affinitize_p = true;
+    AffinitySet config_affinity_set;
+    GCConfigStringHolder cpu_index_ranges_holder(GCConfig::GetGCHeapAffinitizeRanges());
  
-    uint32_t nhp_from_config = static_cast<uint32_t>(GCConfig::GetHeapCount());
+    if (!ParseGCHeapAffinitizeRanges(cpu_index_ranges_holder.Get(), &config_affinity_set))
+    {
+        return CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT;
+    }
+
+    uintptr_t config_affinity_mask = static_cast<uintptr_t>(GCConfig::GetGCHeapAffinitizeMask());
+    const AffinitySet* process_affinity_set = GCToOSInterface::SetGCThreadsAffinitySet(config_affinity_mask, &config_affinity_set);
+
+    if (process_affinity_set->IsEmpty())
+    {
+        return CLR_E_GC_BAD_AFFINITY_CONFIG;
+    }
+
+    nhp_from_config = static_cast<uint32_t>(GCConfig::GetHeapCount());
      
      uint32_t nhp_from_process = GCToOSInterface::GetCurrentProcessCpuCount();
  
-    uint32_t nhp = ((nhp_from_config == 0) ? nhp_from_process :
-                                             (min (nhp_from_config, nhp_from_process)));
+    if (nhp_from_config)
+    {
+        // Even when the user specifies a heap count, it should not be more
+        // than the number of procs this process can use.
+        nhp_from_config = min (nhp_from_config, nhp_from_process);
+    }
+
+    nhp = ((nhp_from_config == 0) ? nhp_from_process : nhp_from_config);
  
      nhp = min (nhp, MAX_SUPPORTED_CPUS);
+#ifndef FEATURE_REDHAWK
+    gc_heap::gc_thread_no_affinitize_p = (gc_heap::heap_hard_limit ? false : (GCConfig::GetNoAffinitize() != 0));
+
+    if (gc_heap::heap_hard_limit)
+    {
+        gc_heap::gc_thread_no_affinitize_p = ((config_affinity_set.Count() == 0) && (config_affinity_mask == 0));
+    }
+
+    if (!(gc_heap::gc_thread_no_affinitize_p))
+    {
+        uint32_t num_affinitized_processors = (uint32_t)process_affinity_set->Count();
+
+        if (num_affinitized_processors != 0)
+        {
+            nhp = min(nhp, num_affinitized_processors);
+        }
+#ifdef FEATURE_PAL
+        // Limit the GC heaps to the number of processors available in the system.
+        nhp = min (nhp, GCToOSInterface::GetTotalProcessorCount());
+#endif // FEATURE_PAL
+    }
+#endif //!FEATURE_REDHAWK
+#endif //MULTIPLE_HEAPS
+
+    size_t seg_size = 0;
+    size_t large_seg_size = 0;
  
+    if (gc_heap::heap_hard_limit)
+    {
+        seg_size = gc_heap::get_segment_size_hard_limit (&nhp, (nhp_from_config == 0));
+        gc_heap::soh_segment_size = seg_size;
+        large_seg_size = seg_size * 2;
+    }
+    else
+    {
+        seg_size = get_valid_segment_size();
+        gc_heap::soh_segment_size = seg_size;
+        large_seg_size = get_valid_segment_size (TRUE);
+    }
+
+    dprintf (1, ("%d heaps, soh seg size: %Id mb, loh: %Id mb\n", 
+        nhp,
+        (seg_size / (size_t)1024 / 1024), 
+        (large_seg_size / 1024 / 1024)));
+
+    gc_heap::min_loh_segment_size = large_seg_size;
+    gc_heap::min_segment_size = min (seg_size, large_seg_size);
+#ifdef SEG_MAPPING_TABLE
+    gc_heap::min_segment_size_shr = index_of_highest_set_bit (gc_heap::min_segment_size);
+#endif //SEG_MAPPING_TABLE
+
+#ifdef MULTIPLE_HEAPS
+    gc_heap::n_heaps = nhp;
      hr = gc_heap::initialize_gc (seg_size, large_seg_size /*LHEAP_ALLOC*/, nhp);
  #else
      hr = gc_heap::initialize_gc (seg_size, large_seg_size /*LHEAP_ALLOC*/);
@@ -33489,27 +34176,39 @@ HRESULT GCHeap::Initialize ()
      if (hr != S_OK)
          return hr;
  
-    gc_heap::total_physical_mem = GCToOSInterface::GetPhysicalMemoryLimit();
-
      gc_heap::mem_one_percent = gc_heap::total_physical_mem / 100;
  #ifndef MULTIPLE_HEAPS
      gc_heap::mem_one_percent /= g_num_processors;
  #endif //!MULTIPLE_HEAPS
  
-    // We should only use this if we are in the "many process" mode which really is only applicable
-    // to very powerful machines - before that's implemented, temporarily I am only enabling this for 80GB+ memory. 
-    // For now I am using an estimate to calculate these numbers but this should really be obtained 
-    // programmatically going forward.
-    // I am assuming 47 processes using WKS GC and 3 using SVR GC.
-    // I am assuming 3 in part due to the "very high memory load" is 97%.
-    int available_mem_th = 10;
-    if (gc_heap::total_physical_mem >= ((uint64_t)80 * 1024 * 1024 * 1024))
+    uint32_t highmem_th_from_config = (uint32_t)GCConfig::GetGCHighMemPercent();
+    if (highmem_th_from_config)
+    {
+        gc_heap::high_memory_load_th = min (99, highmem_th_from_config);
+        gc_heap::v_high_memory_load_th = min (99, (highmem_th_from_config + 7));
+    }
+    else
      {
-        int adjusted_available_mem_th = 3 + (int)((float)47 / (float)(g_num_processors));
-        available_mem_th = min (available_mem_th, adjusted_available_mem_th);
+        // We should only use this if we are in the "many process" mode which really is only applicable
+        // to very powerful machines - before that's implemented, temporarily I am only enabling this for 80GB+ memory. 
+        // For now I am using an estimate to calculate these numbers but this should really be obtained 
+        // programmatically going forward.
+        // I am assuming 47 processes using WKS GC and 3 using SVR GC.
+        // I am assuming 3 in part due to the "very high memory load" is 97%.
+        int available_mem_th = 10;
+        if (gc_heap::total_physical_mem >= ((uint64_t)80 * 1024 * 1024 * 1024))
+        {
+            int adjusted_available_mem_th = 3 + (int)((float)47 / (float)(GCToOSInterface::GetTotalProcessorCount()));
+            available_mem_th = min (available_mem_th, adjusted_available_mem_th);
+        }
+
+        gc_heap::high_memory_load_th = 100 - available_mem_th;
+        gc_heap::v_high_memory_load_th = 97;
      }
  
-    gc_heap::high_memory_load_th = 100 - available_mem_th;
+    gc_heap::m_high_memory_load_th = min ((gc_heap::high_memory_load_th + 5), gc_heap::v_high_memory_load_th);
+
+    gc_heap::pm_stress_on = (GCConfig::GetGCProvModeStress() != 0);
  
  #if defined(BIT64) 
      gc_heap::youngest_gen_desired_th = gc_heap::mem_one_percent;
@@ -33531,7 +34230,9 @@ HRESULT GCHeap::Initialize ()
  #if defined (STRESS_HEAP) && !defined (MULTIPLE_HEAPS)
      if (GCStress<cfg_any>::IsEnabled())  {
          for(int i = 0; i < GCHeap::NUM_HEAP_STRESS_OBJS; i++)
+        {
              m_StressObjs[i] = CreateGlobalHandle(0);
+        }
          m_CurStressObj = 0;
      }
  #endif //STRESS_HEAP && !MULTIPLE_HEAPS
@@ -33622,6 +34323,19 @@ size_t GCHeap::GetPromotedBytes(int heap_index)
      }
  }
  
+void GCHeap::SetYieldProcessorScalingFactor (float scalingFactor)
+{
+    assert (yp_spin_count_unit != 0);
+    int saved_yp_spin_count_unit = yp_spin_count_unit;
+    yp_spin_count_unit = (int)((float)yp_spin_count_unit * scalingFactor / (float)9);
+
+    // It's very suspicious if it becomes 0
+    if (yp_spin_count_unit == 0)
+    {
+        yp_spin_count_unit = saved_yp_spin_count_unit;
+    }
+}
+
  unsigned int GCHeap::WhichGeneration (Object* object)
  {
      gc_heap* hp = gc_heap::heap_of ((uint8_t*)object);
@@ -33689,29 +34403,9 @@ Object * GCHeap::NextObj (Object * object)
  #endif // VERIFY_HEAP
  }
  
-#ifdef VERIFY_HEAP
-
-#ifdef FEATURE_BASICFREEZE
-BOOL GCHeap::IsInFrozenSegment (Object * object)
-{
-    uint8_t* o = (uint8_t*)object;
-    heap_segment * hs = gc_heap::find_segment (o, FALSE);
-    //We create a frozen object for each frozen segment before the segment is inserted
-    //to segment list; during ngen, we could also create frozen objects in segments which
-    //don't belong to current GC heap.
-    //So we return true if hs is NULL. It might create a hole about detecting invalidate 
-    //object. But given all other checks present, the hole should be very small
-    return !hs || heap_segment_read_only_p (hs);
-}
-#endif //FEATURE_BASICFREEZE
-
-#endif //VERIFY_HEAP
-
  // returns TRUE if the pointer is in one of the GC heaps.
  bool GCHeap::IsHeapPointer (void* vpObject, bool small_heap_only)
  {
-    STATIC_CONTRACT_SO_TOLERANT;
-
      // removed STATIC_CONTRACT_CAN_TAKE_LOCK here because find_segment 
      // no longer calls GCEvent::Wait which eventually takes a lock.
  
@@ -33803,11 +34497,11 @@ void GCHeap::Promote(Object** ppObject, ScanContext* sc, uint32_t flags)
  
  #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
      size_t promoted_size_end = hp->promoted_bytes (thread);
-    if (g_fEnableARM)
+    if (g_fEnableAppDomainMonitoring)
      {
          if (sc->pCurrentDomain)
          {
-            sc->pCurrentDomain->RecordSurvivedBytes ((promoted_size_end - promoted_size_begin), thread);
+            GCToEEInterface::RecordSurvivedBytesForHeap((promoted_size_end - promoted_size_begin), thread, sc->pCurrentDomain);
          }
      }
  #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
@@ -33884,7 +34578,7 @@ void GCHeap::Relocate (Object** ppObject, ScanContext* sc,
      // For now we simply look at the size of the object to determine if it in the
      // fixed heap or not. If the bit indicating this gets set at some point
      // we should key off that instead.
-    return size( pObj ) >= LARGE_OBJECT_SIZE;
+    return size( pObj ) >= loh_size_threshold;
  }
  
  #ifndef FEATURE_REDHAWK // Redhawk forces relocation a different way
@@ -34033,7 +34727,7 @@ bool GCHeap::StressHeap(gc_alloc_context * context)
              while(HndFetchHandle(m_StressObjs[i]) == 0)
              {
                  _ASSERTE(m_StressObjs[i] != 0);
-                unsigned strLen = (LARGE_OBJECT_SIZE - 32) / sizeof(WCHAR);
+                unsigned strLen = ((unsigned)loh_size_threshold - 32) / sizeof(WCHAR);
                  unsigned strSize = PtrAlign(StringObject::GetSize(strLen));
                  
                  // update the cached type handle before allocating
@@ -34043,7 +34737,6 @@ bool GCHeap::StressHeap(gc_alloc_context * context)
                  {
                      str->SetMethodTable (g_pStringClass);
                      str->SetStringLength (strLen);
-
                      HndAssignHandle(m_StressObjs[i], ObjectToOBJECTREF(str));
                  }
                  i = (i + 1) % NUM_HEAP_STRESS_OBJS;
@@ -34186,7 +34879,7 @@ GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint3
  #endif //COUNT_CYCLES
  #endif //TRACE_GC
  
-    if (size < LARGE_OBJECT_SIZE)
+    if (size < loh_size_threshold)
      {
  #ifdef TRACE_GC
          AllocSmallCount++;
@@ -34246,7 +34939,7 @@ GCHeap::AllocAlign8Common(void* _hp, alloc_context* acontext, size_t size, uint3
          // support mis-aligned object headers so we can't support biased headers as above. Luckily for us
          // we've managed to arrange things so the only case where we see a bias is for boxed value types and
          // these can never get large enough to be allocated on the LOH.
-        ASSERT(65536 < LARGE_OBJECT_SIZE);
+        ASSERT(65536 < loh_size_threshold);
          ASSERT((flags & GC_ALLOC_ALIGN8_BIAS) == 0);
  
          alloc_context* acontext = generation_alloc_context (hp->generation_of (max_generation+1));
@@ -34373,7 +35066,7 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_
  #endif //_PREFAST_
  #endif //MULTIPLE_HEAPS
  
-    if (size < LARGE_OBJECT_SIZE)
+    if (size < loh_size_threshold)
      {
  
  #ifdef TRACE_GC
@@ -34408,7 +35101,7 @@ GCHeap::Alloc(gc_alloc_context* context, size_t size, uint32_t flags REQD_ALIGN_
  }
  
  void
-GCHeap::FixAllocContext (gc_alloc_context* context, bool lockp, void* arg, void *heap)
+GCHeap::FixAllocContext (gc_alloc_context* context, void* arg, void *heap)
  {
      alloc_context* acontext = static_cast<alloc_context*>(context);
  #ifdef MULTIPLE_HEAPS
@@ -34430,16 +35123,8 @@ GCHeap::FixAllocContext (gc_alloc_context* context, bool lockp, void* arg, void
  
      if (heap == NULL || heap == hp)
      {
-        if (lockp)
-        {
-            enter_spin_lock (&hp->more_space_lock);
-        }
          hp->fix_allocation_context (acontext, ((arg != 0)? TRUE : FALSE),
-                                get_alignment_constant(TRUE));
-        if (lockp)
-        {
-            leave_spin_lock (&hp->more_space_lock);
-        }
+                                    get_alignment_constant(TRUE));
      }
  }
  
@@ -34631,12 +35316,18 @@ GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode)
      if (low_memory_p) 
      {
          if (mode & collection_blocking)
+        {
              reason = reason_lowmemory_blocking;
+        }
          else
+        {
              reason = reason_lowmemory;
+        }
      }
      else
+    {
          reason = reason_induced;
+    }
  
      if (reason == reason_induced)
      {
@@ -34679,20 +35370,34 @@ void gc_heap::do_pre_gc()
      settings.b_state = hp->current_bgc_state;
  #endif //BACKGROUND_GC
  
+#ifdef TRACE_GC
+    size_t total_allocated_since_last_gc = get_total_allocated_since_last_gc();
  #ifdef BACKGROUND_GC
-    dprintf (1, ("*GC* %d(gen0:%d)(%d)(%s)(%d)", 
+    dprintf (1, ("*GC* %d(gen0:%d)(%d)(alloc: %Id)(%s)(%d)", 
          VolatileLoad(&settings.gc_index), 
          dd_collection_count (hp->dynamic_data_of (0)),
          settings.condemned_generation,
+        total_allocated_since_last_gc,
          (settings.concurrent ? "BGC" : (recursive_gc_sync::background_running_p() ? "FGC" : "NGC")),
          settings.b_state));
  #else
-    dprintf (1, ("*GC* %d(gen0:%d)(%d)", 
+    dprintf (1, ("*GC* %d(gen0:%d)(%d)(alloc: %Id)", 
          VolatileLoad(&settings.gc_index), 
          dd_collection_count(hp->dynamic_data_of(0)),
-        settings.condemned_generation));
+        settings.condemned_generation,
+        total_allocated_since_last_gc));
  #endif //BACKGROUND_GC
  
+    if (heap_hard_limit)
+    {
+        size_t total_heap_committed = get_total_committed_size();
+        size_t total_heap_committed_recorded = current_total_committed - current_total_committed_bookkeeping;
+        dprintf (1, ("(%d)GC commit BEG #%Id: %Id (recorded: %Id)", 
+            settings.condemned_generation,
+            (size_t)settings.gc_index, total_heap_committed, total_heap_committed_recorded));
+    }
+#endif //TRACE_GC
+
      // TODO: this can happen...it's because of the way we are calling
      // do_pre_gc, will fix later.
      //if (last_gc_index > VolatileLoad(&settings.gc_index))
@@ -34702,6 +35407,12 @@ void gc_heap::do_pre_gc()
  
      last_gc_index = VolatileLoad(&settings.gc_index);
      GCHeap::UpdatePreGCCounters();
+#if defined(__linux__)
+    GCToEEInterface::UpdateGCEventStatus(static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Default)),
+                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Default)),
+                                         static_cast<int>(GCEventStatus::GetEnabledLevel(GCEventProvider_Private)),
+                                         static_cast<int>(GCEventStatus::GetEnabledKeywords(GCEventProvider_Private)));
+#endif // __linux__
  
      if (settings.concurrent)
      {
@@ -34730,9 +35441,9 @@ void gc_heap::do_pre_gc()
      }
  
  #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
-    if (g_fEnableARM)
+    if (g_fEnableAppDomainMonitoring)
      {
-        SystemDomain::ResetADSurvivedBytes();
+        GCToEEInterface::ResetTotalSurvivedBytes();
      }
  #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING
  }
@@ -34835,6 +35546,44 @@ BOOL gc_heap::should_do_sweeping_gc (BOOL compact_p)
  }
  #endif //GC_CONFIG_DRIVEN
  
+bool gc_heap::is_pm_ratio_exceeded()
+{
+    size_t maxgen_frag = 0;
+    size_t maxgen_size = 0;
+    size_t total_heap_size = get_total_heap_size();
+
+#ifdef MULTIPLE_HEAPS
+    for (int i = 0; i < gc_heap::n_heaps; i++)
+    {
+        gc_heap* hp = gc_heap::g_heaps[i];
+#else //MULTIPLE_HEAPS
+    {
+        gc_heap* hp = pGenGCHeap;
+#endif //MULTIPLE_HEAPS
+
+        maxgen_frag += dd_fragmentation (hp->dynamic_data_of (max_generation));
+        maxgen_size += hp->generation_size (max_generation);
+    }
+
+    double maxgen_ratio = (double)maxgen_size / (double)total_heap_size;
+    double maxgen_frag_ratio = (double)maxgen_frag / (double)maxgen_size;
+    dprintf (GTC_LOG, ("maxgen %Id(%d%% total heap), frag: %Id (%d%% maxgen)",
+        maxgen_size, (int)(maxgen_ratio * 100.0), 
+        maxgen_frag, (int)(maxgen_frag_ratio * 100.0)));
+
+    bool maxgen_highfrag_p = ((maxgen_ratio > 0.5) && (maxgen_frag_ratio > 0.1));
+
+    // We need to adjust elevation here because if there's enough fragmentation it's not
+    // unproductive.
+    if (maxgen_highfrag_p)
+    {
+        settings.should_lock_elevation = FALSE;
+        dprintf (GTC_LOG, ("high frag gen2, turn off elevation"));
+    }
+
+    return maxgen_highfrag_p;
+}
+
  void gc_heap::do_post_gc()
  {
      if (!settings.concurrent)
@@ -34878,6 +35627,71 @@ void gc_heap::do_post_gc()
      last_gc_heap_size = get_total_heap_size();
      last_gc_fragmentation = get_total_fragmentation();
  
+#ifdef TRACE_GC
+    if (heap_hard_limit)
+    {
+        size_t total_heap_committed = get_total_committed_size();
+        size_t total_heap_committed_recorded = current_total_committed - current_total_committed_bookkeeping;
+        dprintf (1, ("(%d)GC commit END #%Id: %Id (recorded: %Id), heap %Id, frag: %Id", 
+            settings.condemned_generation,
+            (size_t)settings.gc_index, total_heap_committed, total_heap_committed_recorded,
+            last_gc_heap_size, last_gc_fragmentation));
+    }
+#endif //TRACE_GC
+
+    // Note we only do this at the end of full blocking GCs because we do not want
+    // to turn on this provisional mode during the middle of a BGC.
+    if ((settings.condemned_generation == max_generation) && (!settings.concurrent))
+    {
+        if (pm_stress_on)
+        {
+            size_t full_compacting_gc_count = full_gc_counts[gc_type_compacting];
+            if (provisional_mode_triggered)
+            {
+                uint64_t r = gc_rand::get_rand(10);
+                if ((full_compacting_gc_count - provisional_triggered_gc_count) >= r)
+                {
+                    provisional_mode_triggered = false;
+                    provisional_off_gc_count = full_compacting_gc_count;
+                    dprintf (GTC_LOG, ("%Id NGC2s when turned on, %Id NGCs since(%Id)",
+                        provisional_triggered_gc_count, (full_compacting_gc_count - provisional_triggered_gc_count),
+                        num_provisional_triggered));
+                }
+            }
+            else
+            {
+                uint64_t r = gc_rand::get_rand(5);
+                if ((full_compacting_gc_count - provisional_off_gc_count) >= r)
+                {
+                    provisional_mode_triggered = true;
+                    provisional_triggered_gc_count = full_compacting_gc_count;
+                    num_provisional_triggered++;
+                    dprintf (GTC_LOG, ("%Id NGC2s when turned off, %Id NGCs since(%Id)",
+                        provisional_off_gc_count, (full_compacting_gc_count - provisional_off_gc_count),
+                        num_provisional_triggered));
+                }
+            }
+        }
+        else
+        {
+            if (provisional_mode_triggered)
+            {
+                if ((settings.entry_memory_load < high_memory_load_th) ||
+                    !is_pm_ratio_exceeded())
+                {
+                    dprintf (GTC_LOG, ("turning off PM"));
+                    provisional_mode_triggered = false;
+                }
+            }
+            else if ((settings.entry_memory_load >= high_memory_load_th) && is_pm_ratio_exceeded())
+            {
+                dprintf (GTC_LOG, ("highmem && highfrag - turning on PM"));
+                provisional_mode_triggered = true;
+                num_provisional_triggered++;
+            }
+        }
+    }
+
      GCHeap::UpdatePostGCCounters();
  #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
      //if (g_fEnableARM)
@@ -34965,11 +35779,11 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason)
  #endif //COUNT_CYCLES
  #endif //TRACE_GC
  
-    gc_heap::g_low_memory_status = (reason == reason_lowmemory) || 
-                                   (reason == reason_lowmemory_blocking) ||
-                                   (gc_heap::latency_level == latency_level_memory_footprint);
+        gc_heap::g_low_memory_status = (reason == reason_lowmemory) || 
+                                       (reason == reason_lowmemory_blocking) ||
+                                       (gc_heap::latency_level == latency_level_memory_footprint);
  
-    gc_trigger_reason = reason;
+        gc_trigger_reason = reason;
  
  #ifdef MULTIPLE_HEAPS
      for (int i = 0; i < gc_heap::n_heaps; i++)
@@ -35038,12 +35852,16 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason)
  
      condemned_generation_number = GcCondemnedGeneration;
  #else
-    if (gc_heap::proceed_with_gc_p)
-    {
-        BEGIN_TIMING(gc_during_log);
-        pGenGCHeap->garbage_collect (condemned_generation_number);
-        END_TIMING(gc_during_log);
-    }
+        if (gc_heap::proceed_with_gc_p)
+        {
+            BEGIN_TIMING(gc_during_log);
+            pGenGCHeap->garbage_collect (condemned_generation_number);
+            if (gc_heap::pm_trigger_full_gc)
+            {
+                pGenGCHeap->garbage_collect_pm_full_gc();
+            }
+            END_TIMING(gc_during_log);
+        }
  #endif //MULTIPLE_HEAPS
  
  #ifdef TRACE_GC
@@ -35241,17 +36059,14 @@ int GCHeap::GetNumberOfHeaps ()
  int GCHeap::GetHomeHeapNumber ()
  {
  #ifdef MULTIPLE_HEAPS
-    Thread *pThread = GCToEEInterface::GetThread();
-    for (int i = 0; i < gc_heap::n_heaps; i++)
+    gc_alloc_context* ctx = GCToEEInterface::GetAllocContext();
+    if (!ctx)
      {
-        if (pThread)
-        {
-            gc_alloc_context* ctx = GCToEEInterface::GetAllocContext();
-            GCHeap *hp = static_cast<alloc_context*>(ctx)->get_home_heap();
-            if (hp == gc_heap::g_heaps[i]->vm_heap) return i;
-        }
+        return 0;
      }
-    return 0;
+
+    GCHeap *hp = static_cast<alloc_context*>(ctx)->get_home_heap();
+    return (hp ? hp->pGenGCHeap->heap_number : 0);
  #else
      return 0;
  #endif //MULTIPLE_HEAPS
@@ -35413,6 +36228,7 @@ void GCHeap::PublishObject (uint8_t* Obj)
  #ifdef BACKGROUND_GC
      gc_heap* hp = gc_heap::heap_of (Obj);
      hp->bgc_alloc_lock->loh_alloc_done (Obj);
+    hp->bgc_untrack_loh_alloc();
  #endif //BACKGROUND_GC
  }
  
@@ -35464,15 +36280,15 @@ HRESULT GCHeap::GetGcCounters(int gen, gc_counters* counters)
  // Get the segment size to use, making sure it conforms.
  size_t GCHeap::GetValidSegmentSize(bool large_seg)
  {
-    return get_valid_segment_size (large_seg);
+    return (large_seg ? gc_heap::min_loh_segment_size : gc_heap::soh_segment_size);
  }
  
  // Get the max gen0 heap size, making sure it conforms.
-size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
+size_t gc_heap::get_gen0_min_size()
  {
      size_t gen0size = static_cast<size_t>(GCConfig::GetGen0Size());
-
-    if ((gen0size == 0) || !g_theGCHeap->IsValidGen0MaxSize(gen0size))
+    bool is_config_invalid = ((gen0size == 0) || !g_theGCHeap->IsValidGen0MaxSize(gen0size));
+    if (is_config_invalid)
      {
  #ifdef SERVER_GC
          // performance data seems to indicate halving the size results
@@ -35482,7 +36298,7 @@ size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
          // if gen0 size is too large given the available memory, reduce it.
          // Get true cache size, as we don't want to reduce below this.
          size_t trueSize = max(GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE),(256*1024));
-        dprintf (2, ("cache: %Id-%Id, cpu: %Id", 
+        dprintf (1, ("cache: %Id-%Id", 
              GCToOSInterface::GetCacheSizePerLogicalCpu(FALSE),
              GCToOSInterface::GetCacheSizePerLogicalCpu(TRUE)));
  
@@ -35494,9 +36310,14 @@ size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
          int n_heaps = 1;
  #endif //SERVER_GC
  
+        dprintf (1, ("gen0size: %Id * %d = %Id, physical mem: %Id / 6 = %Id",
+                gen0size, n_heaps, (gen0size * n_heaps), 
+                gc_heap::total_physical_mem,
+                gc_heap::total_physical_mem / 6));
+
          // if the total min GC across heaps will exceed 1/6th of available memory,
          // then reduce the min GC size until it either fits or has been reduced to cache size.
-        while ((gen0size * n_heaps) > GCToOSInterface::GetPhysicalMemoryLimit() / 6)
+        while ((gen0size * n_heaps) > (gc_heap::total_physical_mem / 6))
          {
              gen0size = gen0size / 2;
              if (gen0size <= trueSize)
@@ -35507,11 +36328,32 @@ size_t GCHeap::GetValidGen0MaxSize(size_t seg_size)
          }
      }
  
+    size_t seg_size = gc_heap::soh_segment_size;
+    assert (seg_size);
+
      // Generation 0 must never be more than 1/2 the segment size.
      if (gen0size >= (seg_size / 2))
          gen0size = seg_size / 2;
  
-    return (gen0size);
+    // If the value from config is valid we use it as is without this adjustment.
+    if (is_config_invalid)
+    {
+        if (heap_hard_limit)
+        {
+            size_t gen0size_seg = seg_size / 8;
+            if (gen0size >= gen0size_seg)
+            {
+                dprintf (1, ("gen0 limited by seg size %Id->%Id", gen0size, gen0size_seg));
+                gen0size = gen0size_seg;
+            }
+        }
+
+        gen0size = gen0size / 8 * 5;
+    }
+
+    gen0size = Align (gen0size);
+
+    return gen0size;
  }
  
  void GCHeap::SetReservedVMLimit (size_t vmlimit)
@@ -35519,7 +36361,6 @@ void GCHeap::SetReservedVMLimit (size_t vmlimit)
      gc_heap::reserved_memory_limit = vmlimit;
  }
  
-
  //versions of same method on each heap
  
  #ifdef FEATURE_PREMORTEM_FINALIZATION
@@ -35722,7 +36563,7 @@ retry:
          unsigned int i = 0;
          while (lock >= 0)
          {
-            YieldProcessor();           // indicate to the processor that we are spining
+            YieldProcessor();           // indicate to the processor that we are spinning
              if (++i & 7)
                  GCToOSInterface::YieldThread (0);
              else
@@ -36039,9 +36880,9 @@ CFinalize::GcScanRoots (promote_func* fn, int hn, ScanContext *pSC)
          //dprintf (3, ("scan freacheable %Ix", (size_t)o));
          dprintf (3, ("scan f %Ix", (size_t)o));
  #ifdef FEATURE_APPDOMAIN_RESOURCE_MONITORING
-        if (g_fEnableARM)
+        if (g_fEnableAppDomainMonitoring)
          {
-            pSC->pCurrentDomain = SystemDomain::GetAppDomainAtIndex(o->GetAppDomainIndex());
+            pSC->pCurrentDomain = GCToEEInterface::GetAppDomainForObject(o);
          }
  #endif //FEATURE_APPDOMAIN_RESOURCE_MONITORING