From 1b827b5a82f8c6f8a9ed760ee127938dea5a7ea4 Mon Sep 17 00:00:00 2001
From: Sean Gillespie <sean@swgillespie.me>
Date: Thu, 16 Mar 2017 17:18:28 -0700
Subject: [PATCH] [Local GC] Break EE dependency on GC's generation table and
 alloc lock in single-proc scenarios (#10065)

* Remove usage of the generation table from the EE by introducing an
EE-owned GC alloc context used for allocations on single-proc machines.

* Move the GC alloc lock to the EE side of the interface

* Repair the Windows ARM build

* Move the decision to use per-thread alloc contexts to the EE

* Rename the lock used by StartNoGCRegion and EndNoGCRegion to be more indicative of what it is protecting

* Address code review feedback 2 (enumerate the global alloc context as a part of GCToEEInterface)

* Code review feedback (3)

* Address code review feedback (move some GC-internal globals to gcimpl.h and gc.cpp)

* g_global_alloc_lock is a dword, not a qword - fixes a deadlock

* Move GlobalAllocLock to gchelpers.cpp and switch to preemptive mode when spinning

* Repair the Windows x86 build
---
 .gitignore                       |   1 +
 src/gc/gc.cpp                    | 220 ++++++++-------------------------------
 src/gc/gc.h                      |   8 --
 src/gc/gccommon.cpp              |  17 ---
 src/gc/gcimpl.h                  |   8 +-
 src/gc/gcinterface.h             |  19 ++--
 src/gc/gcpriv.h                  |   8 --
 src/gc/sample/gcenv.ee.cpp       |   2 +
 src/vm/amd64/JitHelpers_Slow.asm |  68 ++++++------
 src/vm/amd64/asmconstants.h      |   6 ++
 src/vm/arm/stubs.cpp             |   2 +-
 src/vm/ceeload.cpp               |   3 +-
 src/vm/gcenv.ee.cpp              |  14 ++-
 src/vm/gcenv.ee.h                |   2 +-
 src/vm/gcheaputilities.cpp       |   3 +
 src/vm/gcheaputilities.h         |  35 ++++---
 src/vm/gchelpers.cpp             | 124 ++++++++++++++++++++--
 src/vm/i386/jitinterfacex86.cpp  |  29 +++---
 src/vm/jithelpers.cpp            |   8 +-
 src/vm/jitinterfacegen.cpp       |   2 +-
 20 files changed, 270 insertions(+), 309 deletions(-)

diff --git a/.gitignore b/.gitignore
index 60cd8dd..0e66b98 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ syntax: glob
 *.user
 *.userosscache
 *.sln.docstates
+*.swp
 
 # Build results
 [Dd]ebug/
diff --git a/src/gc/gc.cpp b/src/gc/gc.cpp
index 8e2f0bf..b4c0475 100644
--- a/src/gc/gc.cpp
+++ b/src/gc/gc.cpp
@@ -68,6 +68,24 @@ int compact_ratio = 0;
 // See comments in reset_memory.
 BOOL reset_mm_p = TRUE;
 
+bool g_fFinalizerRunOnShutDown = false;
+
+#ifdef FEATURE_SVR_GC
+bool g_built_with_svr_gc = true;
+#else
+bool g_built_with_svr_gc = false;
+#endif // FEATURE_SVR_GC
+
+#if defined(BUILDENV_DEBUG)
+uint8_t g_build_variant = 0;
+#elif defined(BUILDENV_CHECKED)
+uint8_t g_build_variant = 1;
+#else
+uint8_t g_build_variant = 2;
+#endif // defined(BUILDENV_DEBUG)
+
+VOLATILE(int32_t) g_no_gc_lock = -1;
+
 #if defined (TRACE_GC) && !defined (DACCESS_COMPILE)
 const char * const allocation_state_str[] = {
     "start",
@@ -93,7 +111,6 @@ const char * const allocation_state_str[] = {
 };
 #endif //TRACE_GC && !DACCESS_COMPILE
 
-
 // Keep this in sync with the definition of gc_reason
 #if (defined(DT_LOG) || defined(TRACE_GC)) && !defined (DACCESS_COMPILE)
 static const char* const str_gc_reasons[] = 
@@ -150,6 +167,7 @@ size_t GetHighPrecisionTimeStamp()
 }
 #endif
 
+
 #ifdef GC_STATS
 // There is a current and a prior copy of the statistics.  This allows us to display deltas per reporting
 // interval, as well as running totals.  The 'min' and 'max' values require special treatment.  They are
@@ -2685,10 +2703,6 @@ BOOL        gc_heap::heap_analyze_enabled = FALSE;
 
 #ifndef MULTIPLE_HEAPS
 
-extern "C" {
-    generation generation_table[NUMBERGENERATIONS + 1];
-}
-
 alloc_list gc_heap::loh_alloc_list [NUM_LOH_ALIST-1];
 alloc_list gc_heap::gen2_alloc_list[NUM_GEN2_ALIST-1];
 
@@ -2731,9 +2745,7 @@ int         gc_heap::gen0_must_clear_bricks = 0;
 CFinalize*  gc_heap::finalize_queue = 0;
 #endif // FEATURE_PREMORTEM_FINALIZATION
 
-#ifdef MULTIPLE_HEAPS
 generation gc_heap::generation_table [NUMBERGENERATIONS + 1];
-#endif // MULTIPLE_HEAPS
 
 size_t     gc_heap::interesting_data_per_heap[max_idp_count];
 
@@ -5716,18 +5728,13 @@ void gc_mechanisms::record (gc_history_global* history)
 //as opposed to concurrent heap verification
 void gc_heap::fix_youngest_allocation_area (BOOL for_gc_p)
 {
-    assert (alloc_allocated);
-    alloc_context* acontext = generation_alloc_context (youngest_generation);
-    dprintf (3, ("generation 0 alloc context: ptr: %Ix, limit %Ix",
-                 (size_t)acontext->alloc_ptr, (size_t)acontext->alloc_limit));
-    fix_allocation_context (acontext, for_gc_p, get_alignment_constant (TRUE));
-    if (for_gc_p)
-    {
-        acontext->alloc_ptr = alloc_allocated;
-        acontext->alloc_limit = acontext->alloc_ptr;
-    }
-    heap_segment_allocated (ephemeral_heap_segment) =
-        alloc_allocated;
+    UNREFERENCED_PARAMETER(for_gc_p);
+
+    // The gen 0 alloc context is never used for allocation in the allocator path. It's
+    // still used in the allocation path during GCs.
+    assert (generation_allocation_pointer (youngest_generation) == nullptr);
+    assert (generation_allocation_limit (youngest_generation) == nullptr);
+    heap_segment_allocated (ephemeral_heap_segment) = alloc_allocated;
 }
 
 void gc_heap::fix_large_allocation_area (BOOL for_gc_p)
@@ -5832,12 +5839,6 @@ void void_allocation (gc_alloc_context* acontext, void*)
 void gc_heap::repair_allocation_contexts (BOOL repair_p)
 {
     GCToEEInterface::GcEnumAllocContexts (repair_p ? repair_allocation : void_allocation, NULL);
-
-    alloc_context* acontext = generation_alloc_context (youngest_generation);
-    if (repair_p)
-        repair_allocation (acontext, NULL);
-    else
-        void_allocation (acontext, NULL);
 }
 
 struct fix_alloc_context_args
@@ -5857,8 +5858,8 @@ void gc_heap::fix_allocation_contexts(BOOL for_gc_p)
     fix_alloc_context_args args;
     args.for_gc_p = for_gc_p;
     args.heap = __this;
-    GCToEEInterface::GcEnumAllocContexts(fix_alloc_context, &args);
 
+    GCToEEInterface::GcEnumAllocContexts(fix_alloc_context, &args);
     fix_youngest_allocation_area(for_gc_p);
     fix_large_allocation_area(for_gc_p);
 }
@@ -14182,7 +14183,8 @@ uint8_t* gc_heap::allocate_in_condemned_generations (generation* gen,
         to_gen_number = from_gen_number + (settings.promotion ? 1 : 0);
     }
 
-    dprintf (3, ("aic gen%d: s: %Id", gen->gen_num, size));
+    dprintf (3, ("aic gen%d: s: %Id, %d->%d, %Ix->%Ix", gen->gen_num, size, from_gen_number, 
+          to_gen_number, generation_allocation_pointer(gen), generation_allocation_limit(gen)));
 
     int pad_in_front = (old_loc != 0) ? USE_PADDING_FRONT : 0;
 
@@ -32431,36 +32433,18 @@ int                 GCHeap::m_CurStressObj          = 0;
 #endif // FEATURE_REDHAWK
 
 #endif //FEATURE_PREMORTEM_FINALIZATION
-inline
-static void spin_lock ()
-{
-    enter_spin_lock_noinstru (&m_GCLock);
-}
-
-inline
-void EnterAllocLock()
-{
-    spin_lock();
-}
-
-inline
-void LeaveAllocLock()
-{
-    // Trick this out
-    leave_spin_lock_noinstru (&m_GCLock);
-}
 
-class AllocLockHolder
+class NoGCRegionLockHolder
 {
 public:
-    AllocLockHolder()
+    NoGCRegionLockHolder()
     {
-        EnterAllocLock();
+        enter_spin_lock_noinstru(&g_no_gc_lock);
     }
 
-    ~AllocLockHolder()
+    ~NoGCRegionLockHolder()
     {
-        LeaveAllocLock();
+        leave_spin_lock_noinstru(&g_no_gc_lock);
     }
 };
 
@@ -33612,14 +33596,6 @@ HRESULT GCHeap::Init(size_t hn)
 {
     HRESULT hres = S_OK;
 
-    //Initialize all of the instance members.
-
-#ifdef MULTIPLE_HEAPS
-    m_GCLock                = -1;
-#endif //MULTIPLE_HEAPS
-
-    // Rest of the initialization
-
 #ifdef MULTIPLE_HEAPS
     if ((pGenGCHeap = gc_heap::make_gc_heap(this, (int)hn)) == 0)
         hres = E_OUTOFMEMORY;
@@ -34323,122 +34299,7 @@ BOOL GCHeap::StressHeap(gc_alloc_context * context)
 // Small Object Allocator
 //
 //
-Object *
-GCHeap::Alloc( size_t size, uint32_t flags REQD_ALIGN_DCL)
-{
-    CONTRACTL {
-        NOTHROW;
-        GC_TRIGGERS;
-    } CONTRACTL_END;
-
-    TRIGGERSGC();
-
-    Object* newAlloc = NULL;
-
-#ifdef TRACE_GC
-#ifdef COUNT_CYCLES
-    AllocStart = GetCycleCount32();
-    unsigned finish;
-#elif defined(ENABLE_INSTRUMENTATION)
-    unsigned AllocStart = GetInstLogTime();
-    unsigned finish;
-#endif //COUNT_CYCLES
-#endif //TRACE_GC
-
-#ifdef MULTIPLE_HEAPS
-    //take the first heap....
-    gc_heap* hp = gc_heap::g_heaps[0];
-#else
-    gc_heap* hp = pGenGCHeap;
-#ifdef _PREFAST_
-    // prefix complains about us dereferencing hp in wks build even though we only access static members
-    // this way. not sure how to shut it up except for this ugly workaround:
-    PREFIX_ASSUME(hp != NULL);
-#endif //_PREFAST_
-#endif //MULTIPLE_HEAPS
-
-    {
-        AllocLockHolder lh;
-
-#ifndef FEATURE_REDHAWK
-        GCStress<gc_on_alloc>::MaybeTrigger(generation_alloc_context(hp->generation_of(0)));
-#endif // FEATURE_REDHAWK
-
-        alloc_context* acontext = 0;
-
-        if (size < LARGE_OBJECT_SIZE)
-        {
-            acontext = generation_alloc_context (hp->generation_of (0));
-
-#ifdef TRACE_GC
-            AllocSmallCount++;
-#endif //TRACE_GC
-            newAlloc = (Object*) hp->allocate (size + ComputeMaxStructAlignPad(requiredAlignment), acontext);
-#ifdef FEATURE_STRUCTALIGN
-            newAlloc = (Object*) hp->pad_for_alignment ((uint8_t*) newAlloc, requiredAlignment, size, acontext);
-#endif // FEATURE_STRUCTALIGN
-            // ASSERT (newAlloc);
-        }
-        else
-        {
-            acontext = generation_alloc_context (hp->generation_of (max_generation+1));
-
-            newAlloc = (Object*) hp->allocate_large_object (size + ComputeMaxStructAlignPadLarge(requiredAlignment), acontext->alloc_bytes_loh);
-#ifdef FEATURE_STRUCTALIGN
-            newAlloc = (Object*) hp->pad_for_alignment_large ((uint8_t*) newAlloc, requiredAlignment, size);
-#endif // FEATURE_STRUCTALIGN
-        }
-    }
-
-    CHECK_ALLOC_AND_POSSIBLY_REGISTER_FOR_FINALIZATION(newAlloc, size, flags & GC_ALLOC_FINALIZE);
-
-#ifdef TRACE_GC
-#ifdef COUNT_CYCLES
-    finish = GetCycleCount32();
-#elif defined(ENABLE_INSTRUMENTATION)
-    finish = GetInstLogTime();
-#endif //COUNT_CYCLES
-    AllocDuration += finish - AllocStart;
-    AllocCount++;
-#endif //TRACE_GC
-    return newAlloc;
-}
-
-// Allocate small object with an alignment requirement of 8-bytes. Non allocation context version.
-Object *
-GCHeap::AllocAlign8( size_t size, uint32_t flags)
-{
-#ifdef FEATURE_64BIT_ALIGNMENT
-    CONTRACTL {
-        NOTHROW;
-        GC_TRIGGERS;
-    } CONTRACTL_END;
-
-    Object* newAlloc = NULL;
-
-    {
-        AllocLockHolder lh;
-
-#ifdef MULTIPLE_HEAPS
-        //take the first heap....
-        gc_heap* hp = gc_heap::g_heaps[0];
-#else
-        gc_heap* hp = pGenGCHeap;
-#endif //MULTIPLE_HEAPS
-
-        newAlloc = AllocAlign8Common(hp, generation_alloc_context (hp->generation_of (0)), size, flags);
-    }
-
-    return newAlloc;
-#else
-    UNREFERENCED_PARAMETER(size);
-    UNREFERENCED_PARAMETER(flags);
-    assert(!"should not call GCHeap::AllocAlign8 without FEATURE_64BIT_ALIGNMENT defined!");
-    return nullptr;
-#endif  //FEATURE_64BIT_ALIGNMENT
-}
-
-// Allocate small object with an alignment requirement of 8-bytes. Allocation context version.
+// Allocate small object with an alignment requirement of 8-bytes.
 Object*
 GCHeap::AllocAlign8(gc_alloc_context* ctx, size_t size, uint32_t flags )
 {
@@ -35689,7 +35550,7 @@ int GCHeap::WaitForFullGCComplete(int millisecondsTimeout)
 
 int GCHeap::StartNoGCRegion(uint64_t totalSize, BOOL lohSizeKnown, uint64_t lohSize, BOOL disallowFullBlockingGC)
 {
-    AllocLockHolder lh;
+    NoGCRegionLockHolder lh;
 
     dprintf (1, ("begin no gc called"));
     start_no_gc_region_status status = gc_heap::prepare_for_no_gc_region (totalSize, lohSizeKnown, lohSize, disallowFullBlockingGC);
@@ -35707,7 +35568,7 @@ int GCHeap::StartNoGCRegion(uint64_t totalSize, BOOL lohSizeKnown, uint64_t lohS
 
 int GCHeap::EndNoGCRegion()
 {
-    AllocLockHolder lh;
+    NoGCRegionLockHolder lh;
     return (int)gc_heap::end_no_gc_region();
 }
 
@@ -37054,6 +36915,11 @@ BOOL GCHeap::IsConcurrentGCEnabled()
 #endif //BACKGROUND_GC
 }
 
+void GCHeap::SetFinalizeRunOnShutdown(bool value)
+{
+    g_fFinalizerRunOnShutDown = value;
+}
+
 void PopulateDacVars(GcDacVars *gcDacVars)
 {
 #ifndef DACCESS_COMPILE
@@ -37081,7 +36947,7 @@ void PopulateDacVars(GcDacVars *gcDacVars)
     gcDacVars->next_sweep_obj = &gc_heap::next_sweep_obj;
     gcDacVars->oom_info = &gc_heap::oom_info;
     gcDacVars->finalize_queue = reinterpret_cast<dac_finalize_queue**>(&gc_heap::finalize_queue);
-    gcDacVars->generation_table = reinterpret_cast<dac_generation**>(&generation_table);
+    gcDacVars->generation_table = reinterpret_cast<dac_generation**>(&gc_heap::generation_table);
 #ifdef GC_CONFIG_DRIVEN
     gcDacVars->gc_global_mechanisms = reinterpret_cast<size_t**>(&gc_global_mechanisms);
     gcDacVars->interesting_data_per_heap = reinterpret_cast<size_t**>(&gc_heap::interesting_data_per_heap);
diff --git a/src/gc/gc.h b/src/gc/gc.h
index 0469566..e9b52a7 100644
--- a/src/gc/gc.h
+++ b/src/gc/gc.h
@@ -105,9 +105,6 @@ extern "C" uint32_t* g_gc_card_bundle_table;
 extern "C" uint32_t* g_gc_card_table;
 extern "C" uint8_t* g_gc_lowest_address;
 extern "C" uint8_t* g_gc_highest_address;
-extern "C" bool g_fFinalizerRunOnShutDown;
-extern "C" bool g_built_with_svr_gc;
-extern "C" uint8_t g_build_variant;
 
 namespace WKS {
     ::IGCHeapInternal* CreateGCHeap();
@@ -236,11 +233,6 @@ public:
         return mt->GetBaseSize() >= LARGE_OBJECT_SIZE;
     }
 
-    void SetFinalizeRunOnShutdown(bool value)
-    {
-        g_fFinalizerRunOnShutDown = value;
-    }
-
 protected: 
 public:
 #if defined(FEATURE_BASICFREEZE) && defined(VERIFY_HEAP)
diff --git a/src/gc/gccommon.cpp b/src/gc/gccommon.cpp
index 29c4b8d..c176051 100644
--- a/src/gc/gccommon.cpp
+++ b/src/gc/gccommon.cpp
@@ -46,23 +46,6 @@ uint32_t* g_gc_card_bundle_table;
 
 uint8_t* g_gc_lowest_address  = 0;
 uint8_t* g_gc_highest_address = 0;
-bool g_fFinalizerRunOnShutDown = false;
-
-#ifdef FEATURE_SVR_GC
-bool g_built_with_svr_gc = true;
-#else
-bool g_built_with_svr_gc = false;
-#endif // FEATURE_SVR_GC
-
-#if defined(BUILDENV_DEBUG)
-uint8_t g_build_variant = 0;
-#elif defined(BUILDENV_CHECKED)
-uint8_t g_build_variant = 1;
-#else
-uint8_t g_build_variant = 2;
-#endif // defined(BUILDENV_DEBUG)
-
-VOLATILE(int32_t) m_GCLock = -1;
 
 #ifdef GC_CONFIG_DRIVEN
 void record_global_mechanism (int mech_index)
diff --git a/src/gc/gcimpl.h b/src/gc/gcimpl.h
index cb91c4d..67d84b9 100644
--- a/src/gc/gcimpl.h
+++ b/src/gc/gcimpl.h
@@ -39,6 +39,11 @@ void GCProfileWalkHeap();
 class gc_heap;
 class CFinalize;
 
+extern bool g_fFinalizerRunOnShutDown;
+extern bool g_built_with_svr_gc;
+extern uint8_t g_build_variant;
+extern VOLATILE(int32_t) g_no_gc_lock;
+
 class GCHeap : public IGCHeapInternal
 {
 protected:
@@ -91,8 +96,6 @@ public:
     HRESULT Initialize ();
 
     //flags can be GC_ALLOC_CONTAINS_REF GC_ALLOC_FINALIZE
-    Object*  Alloc (size_t size, uint32_t flags);
-    Object*  AllocAlign8 (size_t size, uint32_t flags);
     Object*  AllocAlign8 (gc_alloc_context* acontext, size_t size, uint32_t flags);
 private:
     Object*  AllocAlign8Common (void* hp, alloc_context* acontext, size_t size, uint32_t flags);
@@ -199,6 +202,7 @@ public:
     BOOL ShouldRestartFinalizerWatchDog();
 
     void DiagWalkObject (Object* obj, walk_fn fn, void* context);
+    void SetFinalizeRunOnShutdown(bool value);
 
 public:	// FIX 
 
diff --git a/src/gc/gcinterface.h b/src/gc/gcinterface.h
index 9f88d7b..42cdcc0 100644
--- a/src/gc/gcinterface.h
+++ b/src/gc/gcinterface.h
@@ -194,8 +194,6 @@ extern uint8_t* g_shadow_lowest_address;
 // For low memory notification from host
 extern int32_t g_bLowMemoryFromHost;
 
-extern VOLATILE(int32_t) m_GCLock;
-
 // !!!!!!!!!!!!!!!!!!!!!!!
 // make sure you change the def in bcl\system\gc.cs 
 // if you change this!
@@ -610,21 +608,22 @@ public:
     */
 
     // Allocates an object on the given allocation context with the given size and flags.
+    // It is the responsibility of the caller to ensure that the passed-in alloc context is
+    // owned by the thread that is calling this function. If using per-thread alloc contexts,
+    // no lock is needed; callers not using per-thread alloc contexts will need to acquire
+    // a lock to ensure that the calling thread has unique ownership over this alloc context;
     virtual Object* Alloc(gc_alloc_context* acontext, size_t size, uint32_t flags) = 0;
 
-    // Allocates an object on the default allocation context with the given size and flags.
-    virtual Object* Alloc(size_t size, uint32_t flags) = 0;
-
     // Allocates an object on the large object heap with the given size and flags.
     virtual Object* AllocLHeap(size_t size, uint32_t flags) = 0;
 
-    // Allocates an object on the default allocation context, aligned to 64 bits,
-    // with the given size and flags.
-    virtual Object* AllocAlign8 (size_t size, uint32_t flags) = 0;
-
     // Allocates an object on the given allocation context, aligned to 64 bits,
     // with the given size and flags.
-    virtual Object* AllocAlign8 (gc_alloc_context* acontext, size_t size, uint32_t flags) = 0;
+    // It is the responsibility of the caller to ensure that the passed-in alloc context is
+    // owned by the thread that is calling this function. If using per-thread alloc contexts,
+    // no lock is needed; callers not using per-thread alloc contexts will need to acquire
+    // a lock to ensure that the calling thread has unique ownership over this alloc context.
+    virtual Object* AllocAlign8(gc_alloc_context* acontext, size_t size, uint32_t flags) = 0;
 
     // This is for the allocator to indicate it's done allocating a large object during a 
     // background GC as the BGC threads also need to walk LOH.
diff --git a/src/gc/gcpriv.h b/src/gc/gcpriv.h
index d06902c..b929198 100644
--- a/src/gc/gcpriv.h
+++ b/src/gc/gcpriv.h
@@ -2810,11 +2810,9 @@ public:
     PER_HEAP
     BOOL heap_analyze_success;
 
-#ifdef MULTIPLE_HEAPS
     // The generation table. Must always be last.
     PER_HEAP
     generation generation_table [NUMBERGENERATIONS + 1];
-#endif // MULTIPLE_HEAPS
 
     // End DAC zone
 
@@ -4291,12 +4289,6 @@ gc_heap*& heap_segment_heap (heap_segment* inst)
 }
 #endif //MULTIPLE_HEAPS
 
-#ifndef MULTIPLE_HEAPS
-extern "C" {
-    extern generation generation_table[NUMBERGENERATIONS + 1];
-}
-#endif // MULTIPLE_HEAPS
-
 inline
 generation* gc_heap::generation_of (int  n)
 {
diff --git a/src/gc/sample/gcenv.ee.cpp b/src/gc/sample/gcenv.ee.cpp
index e95a78d..8403bba 100644
--- a/src/gc/sample/gcenv.ee.cpp
+++ b/src/gc/sample/gcenv.ee.cpp
@@ -15,6 +15,8 @@ int32_t g_TrapReturningThreads;
 
 EEConfig * g_pConfig;
 
+gc_alloc_context g_global_alloc_context;
+
 bool CLREventStatic::CreateManualEventNoThrow(bool bInitialState)
 {
     m_hEvent = CreateEventW(NULL, TRUE, bInitialState, NULL);
diff --git a/src/vm/amd64/JitHelpers_Slow.asm b/src/vm/amd64/JitHelpers_Slow.asm
index 7deed49..293e447 100644
--- a/src/vm/amd64/JitHelpers_Slow.asm
+++ b/src/vm/amd64/JitHelpers_Slow.asm
@@ -467,13 +467,9 @@ NESTED_END JIT_NewArr1OBJ_MP, _TEXT
 
 
 
-; <TODO> this m_GCLock should be a size_t so we don't have a store-forwarding penalty in the code below.
-;        Unfortunately, the compiler intrinsic for InterlockedExchangePointer seems to be broken and we
-;        get bad code gen in gc.cpp on IA64. </TODO>
 
-M_GCLOCK equ ?m_GCLock@@3HC
-extern M_GCLOCK:dword
-extern generation_table:qword
+extern g_global_alloc_lock:dword
+extern g_global_alloc_context:qword
 
 LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT
 
@@ -481,20 +477,20 @@ LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT
 
         ; m_BaseSize is guaranteed to be a multiple of 8.
 
-        inc     [M_GCLOCK]
+        inc     [g_global_alloc_lock]
         jnz     JIT_NEW
 
-        mov     rax, [generation_table + 0]     ; alloc_ptr
-        mov     r10, [generation_table + 8]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
 
         add     r8, rax
 
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [generation_table + 0], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
         mov     [rax], rcx
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
 
 ifdef _DEBUG
         call    DEBUG_TrialAllocSetAppDomain_NoScratchArea
@@ -503,7 +499,7 @@ endif ; _DEBUG
         ret
 
     AllocFailed:
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
         jmp     JIT_NEW
 LEAF_END JIT_TrialAllocSFastSP, _TEXT
 
@@ -520,11 +516,11 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
 
         ; m_BaseSize is guaranteed to be a multiple of 8.
 
-        inc     [M_GCLOCK]
+        inc     [g_global_alloc_lock]
         jnz     JIT_Box
 
-        mov     rax, [generation_table + 0]     ; alloc_ptr
-        mov     r10, [generation_table + 8]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
 
         add     r8, rax
 
@@ -532,9 +528,9 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
         ja      NoAlloc
 
 
-        mov     qword ptr [generation_table + 0], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
         mov     [rax], rcx
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
 
 ifdef _DEBUG
         call    DEBUG_TrialAllocSetAppDomain_NoScratchArea
@@ -574,7 +570,7 @@ endif ; _DEBUG
         ret
 
     NoAlloc:
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
         jmp     JIT_Box
 NESTED_END JIT_BoxFastUP, _TEXT
 
@@ -602,20 +598,20 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT
         lea     r8d, [r8d + ecx*2 + 7]
         and     r8d, -8
 
-        inc     [M_GCLOCK]
+        inc     [g_global_alloc_lock]
         jnz     FramedAllocateString
 
-        mov     rax, [generation_table + 0]     ; alloc_ptr
-        mov     r10, [generation_table + 8]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
 
         add     r8, rax
 
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [generation_table + 0], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
         mov     [rax], r11
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
 
         mov     [rax + OFFSETOF__StringObject__m_StringLength], ecx
 
@@ -626,7 +622,7 @@ endif ; _DEBUG
         ret
 
     AllocFailed:
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
         jmp     FramedAllocateString
 LEAF_END AllocateStringFastUP, _TEXT
 
@@ -668,11 +664,11 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT
         add     r8d, 7
         and     r8d, -8
 
-        inc     [M_GCLOCK]
+        inc     [g_global_alloc_lock]
         jnz     JIT_NewArr1
 
-        mov     rax, [generation_table + 0]     ; alloc_ptr
-        mov     r10, [generation_table + 8]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
 
         add     r8, rax
         jc      AllocFailed
@@ -680,9 +676,9 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [generation_table + 0], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
         mov     [rax], r9
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
 
         mov     dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx
 
@@ -693,7 +689,7 @@ endif ; _DEBUG
         ret
 
     AllocFailed:
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
         jmp     JIT_NewArr1
 LEAF_END JIT_NewArr1VC_UP, _TEXT
 
@@ -731,20 +727,20 @@ LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT
         ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed
         ; to be a multiple of 8.
 
-        inc     [M_GCLOCK]
+        inc     [g_global_alloc_lock]
         jnz     JIT_NewArr1
 
-        mov     rax, [generation_table + 0]     ; alloc_ptr
-        mov     r10, [generation_table + 8]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
 
         add     r8, rax
 
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [generation_table + 0], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
         mov     [rax], r9
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
 
         mov     dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx
 
@@ -755,7 +751,7 @@ endif ; _DEBUG
         ret
 
     AllocFailed:
-        mov     [M_GCLOCK], -1
+        mov     [g_global_alloc_lock], -1
 
     OversizedArray:
         jmp     JIT_NewArr1
diff --git a/src/vm/amd64/asmconstants.h b/src/vm/amd64/asmconstants.h
index 9c3b22d..e4f77de 100644
--- a/src/vm/amd64/asmconstants.h
+++ b/src/vm/amd64/asmconstants.h
@@ -165,6 +165,12 @@ ASMCONSTANTS_C_ASSERT(OFFSET__Thread__m_alloc_context__alloc_ptr == offsetof(Thr
 #define               OFFSET__Thread__m_alloc_context__alloc_limit 0x68
 ASMCONSTANTS_C_ASSERT(OFFSET__Thread__m_alloc_context__alloc_limit == offsetof(Thread, m_alloc_context) + offsetof(gc_alloc_context, alloc_limit));
 
+#define               OFFSETOF__gc_alloc_context__alloc_ptr 0x0
+ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_ptr);
+
+#define               OFFSETOF__gc_alloc_context__alloc_limit 0x8
+ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_limit);
+
 #define               OFFSETOF__ThreadExceptionState__m_pCurrentTracker 0x000
 ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadExceptionState__m_pCurrentTracker
                     == offsetof(ThreadExceptionState, m_pCurrentTracker));
diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp
index c276d21..f1ba278 100644
--- a/src/vm/arm/stubs.cpp
+++ b/src/vm/arm/stubs.cpp
@@ -2641,7 +2641,7 @@ void InitJITHelpers1()
         ))
     {
 
-        _ASSERTE(GCHeapUtilities::UseAllocationContexts());
+        _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
         // If the TLS for Thread is low enough use the super-fast helpers
         if (gThreadTLSIndex < TLS_MINIMUM_AVAILABLE)
         {
diff --git a/src/vm/ceeload.cpp b/src/vm/ceeload.cpp
index 1979d69..710195d 100644
--- a/src/vm/ceeload.cpp
+++ b/src/vm/ceeload.cpp
@@ -14519,8 +14519,7 @@ void Module::ExpandAll()
 #include "clrvarargs.h" /* for VARARG C_ASSERTs in asmconstants.h */
 class CheckAsmOffsets
 {
-#define ASMCONSTANTS_C_ASSERT(cond) \
-        typedef char UNIQUE_LABEL(__C_ASSERT__)[(cond) ? 1 : -1];
+#define ASMCONSTANTS_C_ASSERT(cond) static_assert(cond, #cond);
 #include "asmconstants.h"
 };
 
diff --git a/src/vm/gcenv.ee.cpp b/src/vm/gcenv.ee.cpp
index baea98f..2833c99 100644
--- a/src/vm/gcenv.ee.cpp
+++ b/src/vm/gcenv.ee.cpp
@@ -725,10 +725,17 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par
     }
     CONTRACTL_END;
 
-    Thread * pThread = NULL;
-    while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL)
+    if (GCHeapUtilities::UseThreadAllocationContexts())
     {
-        fn(pThread->GetAllocContext(), param);
+        Thread * pThread = NULL;
+        while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL)
+        {
+            fn(pThread->GetAllocContext(), param);
+        }
+    }
+    else
+    {
+        fn(&g_global_alloc_context, param);
     }
 }
 
@@ -1330,3 +1337,4 @@ void GCToEEInterface::EnableFinalization(bool foundFinalizers)
         FinalizerThread::EnableFinalization();
     }
 }
+
diff --git a/src/vm/gcenv.ee.h b/src/vm/gcenv.ee.h
index 9aa3e59..a7ab0b5 100644
--- a/src/vm/gcenv.ee.h
+++ b/src/vm/gcenv.ee.h
@@ -48,4 +48,4 @@ public:
 
 #endif // FEATURE_STANDALONE_GC
 
-#endif // _GCENV_EE_H_
\ No newline at end of file
+#endif // _GCENV_EE_H_
diff --git a/src/vm/gcheaputilities.cpp b/src/vm/gcheaputilities.cpp
index c34d07b..b260c3d 100644
--- a/src/vm/gcheaputilities.cpp
+++ b/src/vm/gcheaputilities.cpp
@@ -31,3 +31,6 @@ uint8_t* g_sw_ww_table = nullptr;
 bool g_sw_ww_enabled_for_gc_heap = false;
 
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
+gc_alloc_context g_global_alloc_context = {};
+
diff --git a/src/vm/gcheaputilities.h b/src/vm/gcheaputilities.h
index 4848114..d5ce46c 100644
--- a/src/vm/gcheaputilities.h
+++ b/src/vm/gcheaputilities.h
@@ -20,6 +20,12 @@ GPTR_DECL(uint32_t,g_card_table);
 }
 #endif // !DACCESS_COMPILE
 
+// For single-proc machines, the EE will use a single, shared alloc context
+// for all allocations. In order to avoid extra indirections in assembly
+// allocation helpers, the EE owns the global allocation context and the
+// GC will update it when it needs to.
+extern "C" gc_alloc_context g_global_alloc_context;
+
 extern "C" uint32_t* g_card_bundle_table;
 extern "C" uint8_t* g_ephemeral_low;
 extern "C" uint8_t* g_ephemeral_high;
@@ -100,22 +106,6 @@ public:
             GetGCHeap()->WaitUntilGCComplete(bConsiderGCStart);
     }
 
-    // Returns true if we should be using allocation contexts, false otherwise.
-    inline static bool UseAllocationContexts()
-    {
-        WRAPPER_NO_CONTRACT;
-#ifdef FEATURE_REDHAWK
-        // SIMPLIFY:  only use allocation contexts
-        return true;
-#else
-#if defined(_TARGET_ARM_) || defined(FEATURE_PAL)
-        return true;
-#else
-        return ((IsServerHeap() ? true : (g_SystemInfo.dwNumberOfProcessors >= 2)));
-#endif 
-#endif 
-    }
-
     // Returns true if the held GC heap is a Server GC heap, false otherwise.
     inline static bool IsServerHeap()
     {
@@ -128,6 +118,18 @@ public:
 #endif // FEATURE_SVR_GC
     }
 
+    static bool UseThreadAllocationContexts()
+    {
+        // When running on a single-proc system, it's more efficient to use a single global
+        // allocation context for SOH allocations than to use one for every thread.
+#if defined(_TARGET_ARM_) || defined(FEATURE_PAL) || defined(FEATURE_REDHAWK)
+        return true;
+#else
+        return IsServerHeap() || ::GetCurrentProcessCpuCount() != 1;
+#endif
+
+    }
+
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 
     // Returns True if software write watch is currently enabled for the GC Heap,
@@ -192,7 +194,6 @@ public:
     }
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 
-
 private:
     // This class should never be instantiated.
     GCHeapUtilities() = delete;
diff --git a/src/vm/gchelpers.cpp b/src/vm/gchelpers.cpp
index 6b3c2f6..258a862 100644
--- a/src/vm/gchelpers.cpp
+++ b/src/vm/gchelpers.cpp
@@ -54,11 +54,110 @@ inline gc_alloc_context* GetThreadAllocContext()
 {
     WRAPPER_NO_CONTRACT;
 
-    assert(GCHeapUtilities::UseAllocationContexts());
+    assert(GCHeapUtilities::UseThreadAllocationContexts());
 
     return & GetThread()->m_alloc_context;
 }
 
+// When not using per-thread allocation contexts, we (the EE) need to take care that
+// no two threads are concurrently modifying the global allocation context. This lock
+// must be acquired before any sort of operations involving the global allocation context
+// can occur.
+//
+// This lock is acquired by all allocations when not using per-thread allocation contexts.
+// It is acquired in two kinds of places:
+//   1) JIT_TrialAllocFastSP (and related assembly alloc helpers), which attempt to
+//      acquire it but move into an alloc slow path if acquiring fails
+//      (but does not decrement the lock variable when doing so)
+//   2) Alloc and AllocAlign8 in gchelpers.cpp, which acquire the lock using
+//      the Acquire and Release methods below.
+class GlobalAllocLock {
+    friend struct AsmOffsets;
+private:
+    // The lock variable. This field must always be first.
+    LONG m_lock;
+
+public:
+    // Creates a new GlobalAllocLock in the unlocked state.
+    GlobalAllocLock() : m_lock(-1) {}
+
+    // Copy and copy-assignment operators should never be invoked
+    // for this type
+    GlobalAllocLock(const GlobalAllocLock&) = delete;
+    GlobalAllocLock& operator=(const GlobalAllocLock&) = delete;
+
+    // Acquires the lock, spinning if necessary to do so. When this method
+    // returns, m_lock will be zero and the lock will be acquired.
+    void Acquire()
+    {
+        CONTRACTL {
+            NOTHROW;
+            GC_TRIGGERS; // switch to preemptive mode
+            MODE_COOPERATIVE;
+        } CONTRACTL_END;
+
+        DWORD spinCount = 0;
+        while(FastInterlockExchange(&m_lock, 0) != -1)
+        {
+            GCX_PREEMP();
+            __SwitchToThread(0, spinCount++);
+        }
+
+        assert(m_lock == 0);
+    }
+
+    // Releases the lock.
+    void Release()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        // the lock may not be exactly 0. This is because the
+        // assembly alloc routines increment the lock variable and
+        // jump if not zero to the slow alloc path, which eventually
+        // will try to acquire the lock again. At that point, it will
+        // spin in Acquire (since m_lock is some number that's not zero).
+        // When the thread that /does/ hold the lock releases it, the spinning
+        // thread will continue.
+        MemoryBarrier();
+        assert(m_lock >= 0);
+        m_lock = -1;
+    }
+
+    // Static helper to acquire a lock, for use with the Holder template.
+    static void AcquireLock(GlobalAllocLock *lock)
+    {
+        WRAPPER_NO_CONTRACT;
+        lock->Acquire();
+    }
+
+    // Static helper to release a lock, for use with the Holder template
+    static void ReleaseLock(GlobalAllocLock *lock)
+    {
+        WRAPPER_NO_CONTRACT;
+        lock->Release();
+    }
+
+    typedef Holder<GlobalAllocLock *, GlobalAllocLock::AcquireLock, GlobalAllocLock::ReleaseLock> Holder;
+};
+
+typedef GlobalAllocLock::Holder GlobalAllocLockHolder;
+
+struct AsmOffsets {
+    static_assert(offsetof(GlobalAllocLock, m_lock) == 0, "ASM code relies on this property");
+};
+
+// For single-proc machines, the global allocation context is protected
+// from concurrent modification by this lock.
+//
+// When not using per-thread allocation contexts, certain methods on IGCHeap
+// require that this lock be held before calling. These methods are documented
+// on the IGCHeap interface.
+extern "C"
+{
+    GlobalAllocLock g_global_alloc_lock;
+}
+
+
 // Checks to see if the given allocation size exceeds the
 // largest object size allowed - if it does, it throws
 // an OutOfMemoryException with a message indicating that
@@ -102,12 +201,12 @@ inline void CheckObjectSize(size_t alloc_size)
 //     * Call code:Alloc - When the jit helpers fall back, or we do allocations within the runtime code
 //         itself, we ultimately call here.
 //     * Call code:AllocLHeap - Used very rarely to force allocation to be on the large object heap.
-//     
+//
 // While this is a choke point into allocating an object, it is primitive (it does not want to know about
 // MethodTable and thus does not initialize that poitner. It also does not know if the object is finalizable
 // or contains pointers. Thus we quickly wrap this function in more user-friendly ones that know about
 // MethodTables etc. (see code:FastAllocatePrimitiveArray code:AllocateArrayEx code:AllocateObject)
-// 
+//
 // You can get an exhaustive list of code sites that allocate GC objects by finding all calls to
 // code:ProfilerObjectAllocatedCallback (since the profiler has to hook them all).
 inline Object* Alloc(size_t size, BOOL bFinalize, BOOL bContainsPointers )
@@ -137,10 +236,16 @@ inline Object* Alloc(size_t size, BOOL bFinalize, BOOL bContainsPointers )
     // We don't want to throw an SO during the GC, so make sure we have plenty
     // of stack before calling in.
     INTERIOR_STACK_PROBE_FOR(GetThread(), static_cast<unsigned>(DEFAULT_ENTRY_PROBE_AMOUNT * 1.5));
-    if (GCHeapUtilities::UseAllocationContexts())
+    if (GCHeapUtilities::UseThreadAllocationContexts())
+    {
         retVal = GCHeapUtilities::GetGCHeap()->Alloc(GetThreadAllocContext(), size, flags);
+    }
     else
-        retVal = GCHeapUtilities::GetGCHeap()->Alloc(size, flags);
+    {
+        GlobalAllocLockHolder holder(&g_global_alloc_lock);
+        retVal = GCHeapUtilities::GetGCHeap()->Alloc(&g_global_alloc_context, size, flags);
+    }
+
 
     if (!retVal)
     {
@@ -172,10 +277,15 @@ inline Object* AllocAlign8(size_t size, BOOL bFinalize, BOOL bContainsPointers,
     // We don't want to throw an SO during the GC, so make sure we have plenty
     // of stack before calling in.
     INTERIOR_STACK_PROBE_FOR(GetThread(), static_cast<unsigned>(DEFAULT_ENTRY_PROBE_AMOUNT * 1.5));
-    if (GCHeapUtilities::UseAllocationContexts())
+    if (GCHeapUtilities::UseThreadAllocationContexts())
+    {
         retVal = GCHeapUtilities::GetGCHeap()->AllocAlign8(GetThreadAllocContext(), size, flags);
+    }
     else
-        retVal = GCHeapUtilities::GetGCHeap()->AllocAlign8(size, flags);
+    {
+        GlobalAllocLockHolder holder(&g_global_alloc_lock);
+        retVal = GCHeapUtilities::GetGCHeap()->AllocAlign8(&g_global_alloc_context, size, flags);
+    }
 
     if (!retVal)
     {
diff --git a/src/vm/i386/jitinterfacex86.cpp b/src/vm/i386/jitinterfacex86.cpp
index c1769eb..1d8a6ba 100644
--- a/src/vm/i386/jitinterfacex86.cpp
+++ b/src/vm/i386/jitinterfacex86.cpp
@@ -34,8 +34,7 @@
 #define MON_DEBUG 1
 #endif
 
-class generation;
-extern "C" generation generation_table[];
+extern "C" LONG g_global_alloc_lock;
 
 extern "C" void STDCALL JIT_WriteBarrierReg_PreGrow();// JIThelp.asm/JIThelp.s
 extern "C" void STDCALL JIT_WriteBarrierReg_PostGrow();// JIThelp.asm/JIThelp.s
@@ -562,9 +561,9 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
     else
     {
         // Take the GC lock (there is no lock prefix required - we will use JIT_TrialAllocSFastMP on an MP System).
-        // inc             dword ptr [m_GCLock]
+        // inc             dword ptr [g_global_alloc_lock]
         psl->Emit16(0x05ff);
-        psl->Emit32((int)(size_t)&m_GCLock);
+        psl->Emit32((int)(size_t)&g_global_alloc_lock);
 
         // jnz             NoLock
         psl->X86EmitCondJump(noLock, X86CondCode::kJNZ);
@@ -580,9 +579,9 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
             psl->X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable, m_BaseSize));
         }
 
-        // mov             eax, dword ptr [generation_table]
+        // mov             eax, dword ptr [g_global_alloc_context]
         psl->Emit8(0xA1);
-        psl->Emit32((int)(size_t)&generation_table);
+        psl->Emit32((int)(size_t)&g_global_alloc_context);
 
         // Try the allocation.
         // add             edx, eax
@@ -591,17 +590,17 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
         if (flags & (ALIGN8 | ALIGN8OBJ))
             EmitAlignmentRoundup(psl, kEAX, kEDX, flags);      // bump up EDX size by 12 if EAX unaligned (so that we are aligned)
 
-        // cmp             edx, dword ptr [generation_table+4]
+        // cmp             edx, dword ptr [g_global_alloc_context+4]
         psl->Emit16(0x153b);
-        psl->Emit32((int)(size_t)&generation_table + 4);
+        psl->Emit32((int)(size_t)&g_global_alloc_context + 4);
 
         // ja              noAlloc
         psl->X86EmitCondJump(noAlloc, X86CondCode::kJA);
 
         // Fill in the allocation and get out.
-        // mov             dword ptr [generation_table], edx
+        // mov             dword ptr [g_global_alloc_context], edx
         psl->Emit16(0x1589);
-        psl->Emit32((int)(size_t)&generation_table);
+        psl->Emit32((int)(size_t)&g_global_alloc_context);
 
         if (flags & (ALIGN8 | ALIGN8OBJ))
             EmitDummyObject(psl, kEAX, flags);
@@ -609,9 +608,9 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
         // mov             dword ptr [eax], ecx
         psl->X86EmitIndexRegStore(kEAX, 0, kECX);
 
-        // mov             dword ptr [m_GCLock], 0FFFFFFFFh
+        // mov             dword ptr [g_global_alloc_lock], 0FFFFFFFFh
         psl->Emit16(0x05C7);
-        psl->Emit32((int)(size_t)&m_GCLock);
+        psl->Emit32((int)(size_t)&g_global_alloc_lock);
         psl->Emit32(0xFFFFFFFF);
     }
 
@@ -667,9 +666,9 @@ void JIT_TrialAlloc::EmitNoAllocCode(CPUSTUBLINKER *psl, Flags flags)
     }
     else
     {
-        // mov             dword ptr [m_GCLock], 0FFFFFFFFh
+        // mov             dword ptr [g_global_alloc_lock], 0FFFFFFFFh
         psl->Emit16(0x05c7);
-        psl->Emit32((int)(size_t)&m_GCLock);
+        psl->Emit32((int)(size_t)&g_global_alloc_lock);
         psl->Emit32(0xFFFFFFFF);
     }
 }
@@ -1427,7 +1426,7 @@ void InitJITHelpers1()
 
     _ASSERTE(g_SystemInfo.dwNumberOfProcessors != 0);
 
-    JIT_TrialAlloc::Flags flags = GCHeapUtilities::UseAllocationContexts() ?
+    JIT_TrialAlloc::Flags flags = GCHeapUtilities::UseThreadAllocationContexts() ?
         JIT_TrialAlloc::MP_ALLOCATOR : JIT_TrialAlloc::NORMAL;
 
     // Get CPU features and check for SSE2 support.
diff --git a/src/vm/jithelpers.cpp b/src/vm/jithelpers.cpp
index b46ac98..aaab589 100644
--- a/src/vm/jithelpers.cpp
+++ b/src/vm/jithelpers.cpp
@@ -2752,7 +2752,7 @@ HCIMPL1(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_)
 
     do
     {
-        _ASSERTE(GCHeapUtilities::UseAllocationContexts());
+        _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
 
         // This is typically the only call in the fast path. Making the call early seems to be better, as it allows the compiler
         // to use volatile registers for intermediate values. This reduces the number of push/pop instructions and eliminates
@@ -2844,7 +2844,7 @@ HCIMPL1(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength)
 
     do
     {
-        _ASSERTE(GCHeapUtilities::UseAllocationContexts());
+        _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
 
         // Instead of doing elaborate overflow checks, we just limit the number of elements. This will avoid all overflow
         // problems, as well as making sure big string objects are correctly allocated in the big object heap.
@@ -3008,7 +3008,7 @@ HCIMPL2(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayTypeHn
 
     do
     {
-        _ASSERTE(GCHeapUtilities::UseAllocationContexts());
+        _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
 
         // Do a conservative check here.  This is to avoid overflow while doing the calculations.  We don't
         // have to worry about "large" objects, since the allocation quantum is never big enough for
@@ -3085,7 +3085,7 @@ HCIMPL2(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayTypeH
 
     do
     {
-        _ASSERTE(GCHeapUtilities::UseAllocationContexts());
+        _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
 
         // Make sure that the total size cannot reach LARGE_OBJECT_SIZE, which also allows us to avoid overflow checks. The
         // "256" slack is to cover the array header size and round-up, using a constant value here out of laziness.
diff --git a/src/vm/jitinterfacegen.cpp b/src/vm/jitinterfacegen.cpp
index 99e03f4..ce4c1e9 100644
--- a/src/vm/jitinterfacegen.cpp
+++ b/src/vm/jitinterfacegen.cpp
@@ -218,7 +218,7 @@ void InitJITHelpers1()
         ))
     {
         // if (multi-proc || server GC)
-        if (GCHeapUtilities::UseAllocationContexts())
+        if (GCHeapUtilities::UseThreadAllocationContexts())
         {
 #ifdef FEATURE_IMPLICIT_TLS
             SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
-- 
2.7.4