Optimize LOS for better parallelization. (mono/mono#17173)
authorJohan Lorensson <lateralusx.github@gmail.com>
Wed, 23 Oct 2019 18:58:04 +0000 (20:58 +0200)
committerAleksey Kliger (λgeek) <alklig@microsoft.com>
Wed, 23 Oct 2019 18:58:04 +0000 (14:58 -0400)
Scanning LOS list as part of minor GC didn't parallelize well since
all scan jobs needed to walk the complete LOS list. This led to touching
a lot of memory walking the list, increasing minor GC pause times
when the number of items in the LOS list increased. As an example,
stressing LOS data structure using ~600 MB's of random sized byte arrays
caused minor GC pause times between ~20ms without parallelization and ~19ms
with parallelization. NOTE, the more work each job gets (more memory to scan)
we will get a better parallelization, but still a lot of room for improvement.

Changing from a normal linked list to a SgenArrayList makes it more effective
to parallelize the scan jobs and since the list is using sequential
memory, this will also reduce cache misses for each thread iterating LOS
objects. Tagging objects in SgenArrayList with reference information reduce
cache misses dramatically in cases where objects not including references
exists on the list, since scanning jobs can skip these items without any
need to touch object memory causing cache misses.

Same scenario running with fix gives us minor GC pause times, without
parallelization in ~7ms (down from 20~ms) and with parallelization,
< 5ms (down from 19ms). NOTE, the more work each job gets (more memory to scan)
we will get a better result using parallelization, so the delta will increase
between using and not using parallelization, this scenario primarily stress
the extreme case where most LOS objects doesn't have references.

In total, this optimization reduce minor GC pause time 4x, from ~19ms when
using parallelization down to < 5ms.

Commit migrated from https://github.com/mono/mono/commit/df30c7228c4061573601ea9a71772cc6f00e4b85

src/mono/mono/metadata/sgen-mono.c
src/mono/mono/sgen/sgen-array-list.h
src/mono/mono/sgen/sgen-debug.c
src/mono/mono/sgen/sgen-gc.c
src/mono/mono/sgen/sgen-gc.h
src/mono/mono/sgen/sgen-los.c

index c0d2788..d949bfd 100644 (file)
@@ -811,6 +811,18 @@ clear_domain_free_major_pinned_object_callback (GCObject *obj, size_t size, Mono
 }
 
 static void
+clear_domain_process_los_object_callback (GCObject *obj, size_t size, MonoDomain *domain)
+{
+       clear_domain_process_object (obj, domain);
+}
+
+static gboolean
+clear_domain_free_los_object_callback (GCObject *obj, size_t size, MonoDomain *domain)
+{
+       return need_remove_object_for_domain (obj, domain);
+}
+
+static void
 sgen_finish_concurrent_work (const char *reason, gboolean stw)
 {
        if (sgen_get_concurrent_collection_in_progress ())
@@ -876,25 +888,10 @@ mono_gc_clear_domain (MonoDomain * domain)
           dereference a pointer from an object to another object if
           the first object is a proxy. */
        sgen_major_collector.iterate_objects (ITERATE_OBJECTS_SWEEP_ALL, (IterateObjectCallbackFunc)clear_domain_process_major_object_callback, domain);
-       for (bigobj = sgen_los_object_list; bigobj; bigobj = bigobj->next)
-               clear_domain_process_object ((GCObject*)bigobj->data, domain);
 
-       prev = NULL;
-       for (bigobj = sgen_los_object_list; bigobj;) {
-               if (need_remove_object_for_domain ((GCObject*)bigobj->data, domain)) {
-                       LOSObject *to_free = bigobj;
-                       if (prev)
-                               prev->next = bigobj->next;
-                       else
-                               sgen_los_object_list = bigobj->next;
-                       bigobj = bigobj->next;
-                       SGEN_LOG (4, "Freeing large object %p", bigobj->data);
-                       sgen_los_free_object (to_free);
-                       continue;
-               }
-               prev = bigobj;
-               bigobj = bigobj->next;
-       }
+       sgen_los_iterate_objects ((IterateObjectCallbackFunc)clear_domain_process_los_object_callback, domain);
+       sgen_los_iterate_objects_free ((IterateObjectResultCallbackFunc)clear_domain_free_los_object_callback, domain);
+
        sgen_major_collector.iterate_objects (ITERATE_OBJECTS_SWEEP_NON_PINNED, (IterateObjectCallbackFunc)clear_domain_free_major_non_pinned_object_callback, domain);
        sgen_major_collector.iterate_objects (ITERATE_OBJECTS_SWEEP_PINNED, (IterateObjectCallbackFunc)clear_domain_free_major_pinned_object_callback, domain);
 
index 8644f5a..d98e678 100644 (file)
@@ -49,24 +49,46 @@ typedef struct {
        int mem_type; /* sgen internal mem type or -1 for malloc allocation */
 } SgenArrayList;
 
-/*
- * Computes floor(log2(index + MIN_BUCKET_SIZE)) - 1, giving the index
- * of the bucket containing a slot.
- */
+#if defined(__GNUC__)
 static inline guint32
-sgen_array_list_index_bucket (guint32 index)
+sgen_clz (guint32 x)
+{
+       return __builtin_clz (x);
+}
+#elif !defined(ENABLE_MSVC_LZCNT) && defined(_MSC_VER)
+static inline guint32
+sgen_clz (guint32 x)
+{
+       gulong leading_zero_bits;
+       return _BitScanReverse (&leading_zero_bits, (gulong)x) ? 31 - leading_zero_bits : 32;
+}
+#elif defined(ENABLE_MSVC_LZCNT) && defined(_MSC_VER)
+static inline guint32
+sgen_clz (guint32 x)
 {
-#ifdef __GNUC__
-       return CHAR_BIT * sizeof (index) - __builtin_clz (index + SGEN_ARRAY_LIST_MIN_BUCKET_SIZE) - 1 - SGEN_ARRAY_LIST_MIN_BUCKET_BITS;
+       return __lzcnt (x);
+}
 #else
+static inline guint32
+sgen_clz (guint32 x)
+{
        guint count = 0;
-       index += SGEN_ARRAY_LIST_MIN_BUCKET_SIZE;
-       while (index) {
+       while (x) {
                ++count;
-               index >>= 1;
+               x >>= 1;
        }
-       return count - 1 - SGEN_ARRAY_LIST_MIN_BUCKET_BITS;
+       return 32 - count;
+}
 #endif
+
+/*
+ * Computes floor(log2(index + MIN_BUCKET_SIZE)) - 1, giving the index
+ * of the bucket containing a slot.
+ */
+static inline guint32
+sgen_array_list_index_bucket (guint32 index)
+{
+       return CHAR_BIT * sizeof (index) - sgen_clz (index + SGEN_ARRAY_LIST_MIN_BUCKET_SIZE) - 1 - SGEN_ARRAY_LIST_MIN_BUCKET_BITS;
 }
 
 static inline guint32
index 230cbee..0d27180 100644 (file)
@@ -1024,15 +1024,12 @@ scan_object_for_xdomain_refs (GCObject *obj, mword size, void *data)
 void
 sgen_check_for_xdomain_refs (void)
 {
-       LOSObject *bigobj;
-
        sgen_scan_area_with_callback (sgen_nursery_section->data, sgen_nursery_section->end_data,
                        (IterateObjectCallbackFunc)scan_object_for_xdomain_refs, NULL, FALSE, TRUE);
 
        sgen_major_collector.iterate_objects (ITERATE_OBJECTS_SWEEP_ALL, (IterateObjectCallbackFunc)scan_object_for_xdomain_refs, NULL);
 
-       for (bigobj = sgen_los_object_list; bigobj; bigobj = bigobj->next)
-               scan_object_for_xdomain_refs ((GCObject*)bigobj->data, sgen_los_object_size (bigobj), NULL);
+       sgen_los_iterate_objects ((IterateObjectCallbackFunc)scan_object_for_xdomain_refs, NULL);
 }
 
 #endif
@@ -1131,6 +1128,12 @@ dump_object (GCObject *obj, gboolean dump_location)
 #endif
 }
 
+static void
+dump_object_callback (GCObject *obj, size_t size, gboolean dump_location)
+{
+       dump_object (obj, dump_location);
+}
+
 void
 sgen_debug_enable_heap_dump (const char *filename)
 {
@@ -1145,7 +1148,6 @@ void
 sgen_debug_dump_heap (const char *type, int num, const char *reason)
 {
        SgenPointerQueue *pinned_objects;
-       LOSObject *bigobj;
        int i;
 
        if (!heap_dump_file)
@@ -1174,8 +1176,7 @@ sgen_debug_dump_heap (const char *type, int num, const char *reason)
        sgen_major_collector.dump_heap (heap_dump_file);
 
        fprintf (heap_dump_file, "<los>\n");
-       for (bigobj = sgen_los_object_list; bigobj; bigobj = bigobj->next)
-               dump_object ((GCObject*)bigobj->data, FALSE);
+       sgen_los_iterate_objects ((IterateObjectCallbackFunc)dump_object_callback, (void*)FALSE);
        fprintf (heap_dump_file, "</los>\n");
 
        fprintf (heap_dump_file, "</collection>\n");
index 4dcfbee..7b5f7d8 100644 (file)
@@ -1915,7 +1915,6 @@ typedef enum {
 static void
 major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_next_pin_slot, CopyOrMarkFromRootsMode mode, SgenObjectOperations *object_ops_nopar, SgenObjectOperations *object_ops_par)
 {
-       LOSObject *bigobj;
        TV_DECLARE (atv);
        TV_DECLARE (btv);
        /* FIXME: only use these values for the precise scan
@@ -1999,26 +1998,7 @@ major_copy_or_mark_from_roots (SgenGrayQueue *gc_thread_gray_queue, size_t *old_
        sgen_find_section_pin_queue_start_end (sgen_nursery_section);
        /* identify possible pointers to the insize of large objects */
        SGEN_LOG (6, "Pinning from large objects");
-       for (bigobj = sgen_los_object_list; bigobj; bigobj = bigobj->next) {
-               size_t dummy;
-               if (sgen_find_optimized_pin_queue_area ((char*)bigobj->data, (char*)bigobj->data + sgen_los_object_size (bigobj), &dummy, &dummy)) {
-                       sgen_binary_protocol_pin (bigobj->data, (gpointer)LOAD_VTABLE (bigobj->data), safe_object_get_size (bigobj->data));
-
-                       if (sgen_los_object_is_pinned (bigobj->data)) {
-                               SGEN_ASSERT (0, mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT, "LOS objects can only be pinned here after concurrent marking.");
-                               continue;
-                       }
-                       sgen_los_pin_object (bigobj->data);
-                       if (SGEN_OBJECT_HAS_REFERENCES (bigobj->data))
-                               GRAY_OBJECT_ENQUEUE_SERIAL (gc_thread_gray_queue, bigobj->data, sgen_obj_get_descriptor ((GCObject*)bigobj->data));
-                       sgen_pin_stats_register_object (bigobj->data, GENERATION_OLD);
-                       SGEN_LOG (6, "Marked large object %p (%s) size: %lu from roots", bigobj->data,
-                                       sgen_client_vtable_get_name (SGEN_LOAD_VTABLE (bigobj->data)),
-                                       (unsigned long)sgen_los_object_size (bigobj));
-
-                       sgen_client_pinned_los_object (bigobj->data);
-               }
-       }
+       sgen_los_pin_objects (gc_thread_gray_queue, mode == COPY_OR_MARK_FROM_ROOTS_FINISH_CONCURRENT);
 
        pin_objects_in_nursery (mode == COPY_OR_MARK_FROM_ROOTS_START_CONCURRENT, ctx);
 
index 313c28e..34ee230 100644 (file)
@@ -401,6 +401,7 @@ void sgen_deregister_root (char* addr)
        MONO_PERMIT (need (sgen_lock_gc));
 
 typedef void (*IterateObjectCallbackFunc) (GCObject*, size_t, void*);
+typedef gboolean (*IterateObjectResultCallbackFunc) (GCObject*, size_t, void*);
 
 void sgen_scan_area_with_callback (char *start, char *end, IterateObjectCallbackFunc callback, void *data, gboolean allow_flags, gboolean fail_on_canaries);
 
@@ -900,16 +901,11 @@ gboolean sgen_set_allow_synchronous_major (gboolean flag);
 
 typedef struct _LOSObject LOSObject;
 struct _LOSObject {
-       LOSObject *next;
        mword size; /* this is the object size, lowest bit used for pin/mark */
        guint8 * volatile cardtable_mod_union; /* only used by the concurrent collector */
-#if SIZEOF_VOID_P < 8
-       mword dummy;            /* to align object to sizeof (double) */
-#endif
        GCObject data [MONO_ZERO_LEN_ARRAY];
 };
 
-extern LOSObject *sgen_los_object_list;
 extern mword sgen_los_memory_usage;
 extern mword sgen_los_memory_usage_total;
 
@@ -919,6 +915,7 @@ void* sgen_los_alloc_large_inner (GCVTable vtable, size_t size)
 void sgen_los_sweep (void);
 gboolean sgen_ptr_is_in_los (char *ptr, char **start);
 void sgen_los_iterate_objects (IterateObjectCallbackFunc cb, void *user_data);
+void sgen_los_iterate_objects_free (IterateObjectResultCallbackFunc cb, void *user_data);
 void sgen_los_iterate_live_block_ranges (sgen_cardtable_block_callback callback);
 void sgen_los_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count);
 void sgen_los_update_cardtable_mod_union (void);
@@ -928,6 +925,7 @@ gboolean mono_sgen_los_describe_pointer (char *ptr);
 LOSObject* sgen_los_header_for_object (GCObject *data);
 mword sgen_los_object_size (LOSObject *obj);
 void sgen_los_pin_object (GCObject *obj);
+void sgen_los_pin_objects (SgenGrayQueue *gray_queue, gboolean finish_concurrent_mode);
 gboolean sgen_los_pin_object_par (GCObject *obj);
 gboolean sgen_los_object_is_pinned (GCObject *obj);
 void sgen_los_mark_mod_union_card (GCObject *mono_obj, void **ptr);
index c030b1a..45c8ad2 100644 (file)
@@ -30,6 +30,8 @@
 #include "mono/sgen/sgen-cardtable.h"
 #include "mono/sgen/sgen-memory-governor.h"
 #include "mono/sgen/sgen-client.h"
+#include "mono/sgen/sgen-array-list.h"
+#include "mono/sgen/sgen-pinning.h"
 
 #define LOS_SECTION_SIZE       (1024 * 1024)
 
@@ -63,8 +65,61 @@ struct _LOSSection {
        unsigned char *free_chunk_map;
 };
 
-/* We allow read only access on the list while sweep is not running */
-LOSObject *sgen_los_object_list = NULL;
+/* We allow read only access on the array list while sweep is not running */
+static SgenArrayList sgen_los_object_array_list = SGEN_ARRAY_LIST_INIT (NULL, sgen_array_list_default_is_slot_set, NULL, INTERNAL_MEM_PIN_QUEUE);
+static gboolean compact_los_objects = FALSE;
+
+#define LOS_OBJECT_IS_TAGGED_HAS_REFERENCES(bl) SGEN_POINTER_IS_TAGGED_1 ((bl))
+#define LOS_OBJECT_TAG_HAS_REFERENCES(bl) SGEN_POINTER_TAG_1 ((bl))
+
+#define LOS_OBJECT_UNTAG(bl) ((LOSObject *)SGEN_POINTER_UNTAG_1 ((bl)))
+
+#define LOS_OBJECT_TAG(bl) (SGEN_OBJECT_HAS_REFERENCES((bl)->data) ? LOS_OBJECT_TAG_HAS_REFERENCES ((bl)) : (bl))
+
+#define FOREACH_LOS_OBJECT_NO_LOCK(bl) { \
+       volatile gpointer *slot; \
+       SGEN_ARRAY_LIST_FOREACH_SLOT (&sgen_los_object_array_list, slot) { \
+               (bl) = LOS_OBJECT_UNTAG (*slot); \
+               if (!(bl)) \
+                       continue;
+
+#define FREE_CURRENT_LOS_OBJECT_NO_LOCK(bl) \
+       *slot = NULL; \
+       sgen_los_free_object (bl);
+
+#define END_FOREACH_LOS_OBJECT_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
+
+#define FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK(bl,hr) { \
+       volatile gpointer *slot; \
+       SGEN_ARRAY_LIST_FOREACH_SLOT (&sgen_los_object_array_list, slot) { \
+               (bl) = (LOSObject *) (*slot); \
+               if (!(bl)) \
+                       continue; \
+               (hr) = LOS_OBJECT_IS_TAGGED_HAS_REFERENCES ((bl)); \
+               (bl) = LOS_OBJECT_UNTAG ((bl));
+
+#define END_FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT; }
+
+#define FOREACH_LOS_OBJECT_RANGE_NO_LOCK(bl,begin,end,index) { \
+       volatile gpointer *slot; \
+       SGEN_ARRAY_LIST_FOREACH_SLOT_RANGE (&sgen_los_object_array_list, begin, end, slot, index) { \
+               (bl) = LOS_OBJECT_UNTAG (*slot); \
+               if (!(bl)) \
+                       continue;
+
+#define END_FOREACH_LOS_OBJECT_RANGE_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT_RANGE; }
+
+#define FOREACH_LOS_OBJECT_RANGE_HAS_REFERENCES_NO_LOCK(bl,begin,end,index,hr) { \
+       volatile gpointer *slot; \
+       SGEN_ARRAY_LIST_FOREACH_SLOT_RANGE (&sgen_los_object_array_list, begin, end, slot, index) { \
+               (bl) = (LOSObject *) (*slot); \
+               if (!(bl)) \
+                       continue; \
+               (hr) = LOS_OBJECT_IS_TAGGED_HAS_REFERENCES ((bl)); \
+               (bl) = LOS_OBJECT_UNTAG ((bl));
+
+#define END_FOREACH_LOS_OBJECT_RANGE_HAS_REFERENCES_NO_LOCK } SGEN_ARRAY_LIST_END_FOREACH_SLOT_RANGE; }
+
 /* Memory used by LOS objects */
 mword sgen_los_memory_usage = 0;
 /* Total memory used by the LOS allocator */
@@ -101,7 +156,7 @@ los_consistency_check (void)
        int i;
        mword memory_usage = 0;
 
-       for (obj = sgen_los_object_list; obj; obj = obj->next) {
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
                mword obj_size = sgen_los_object_size (obj);
                char *end = obj->data + obj_size;
                int start_index, num_chunks;
@@ -119,7 +174,7 @@ los_consistency_check (void)
                num_chunks = (obj_size + sizeof (LOSObject) + LOS_CHUNK_SIZE - 1) >> LOS_CHUNK_BITS;
                for (i = start_index; i < start_index + num_chunks; ++i)
                        g_assert (!section->free_chunk_map [i]);
-       }
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
 
        for (i = 0; i < LOS_NUM_FAST_SIZES; ++i) {
                LOSFreeChunks *size_chunks;
@@ -408,13 +463,13 @@ sgen_los_alloc_large_inner (GCVTable vtable, size_t size)
        vtslot = (void**)obj->data;
        *vtslot = vtable;
        sgen_update_heap_boundaries ((mword)obj->data, (mword)obj->data + size);
-       obj->next = sgen_los_object_list;
+
        /*
-        * We need a memory barrier so we don't expose as head of the los object list
-        * a LOSObject that doesn't have its fields initialized.
+        * We need a memory barrier so we don't expose a LOSObject
+        * that doesn't have its fields initialized.
         */
        mono_memory_write_barrier ();
-       sgen_los_object_list = obj;
+       sgen_array_list_add (&sgen_los_object_array_list, LOS_OBJECT_TAG (obj), 0, FALSE);
        sgen_los_memory_usage += size;
        los_num_objects++;
        SGEN_LOG (4, "Allocated large object %p, vtable: %p (%s), size: %zd", obj->data, vtable, sgen_client_vtable_get_name (vtable), size);
@@ -432,39 +487,35 @@ static void sgen_los_unpin_object (GCObject *data);
 void
 sgen_los_sweep (void)
 {
-       LOSObject *bigobj, *prevbo;
+       LOSObject *obj;
        LOSSection *section, *prev;
        int i;
        int num_sections = 0;
 
        /* sweep the big objects list */
-       prevbo = NULL;
-       for (bigobj = sgen_los_object_list; bigobj;) {
-               SGEN_ASSERT (0, !SGEN_OBJECT_IS_PINNED (bigobj->data), "Who pinned a LOS object?");
-
-               if (sgen_los_object_is_pinned (bigobj->data)) {
-                       if (bigobj->cardtable_mod_union) {
-                               mword obj_size = sgen_los_object_size (bigobj);
-                               mword num_cards = sgen_card_table_number_of_cards_in_range ((mword) bigobj->data, obj_size);
-                               memset (bigobj->cardtable_mod_union, 0, num_cards);
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
+               SGEN_ASSERT (0, !SGEN_OBJECT_IS_PINNED (obj->data), "Who pinned a LOS object?");
+
+               if (sgen_los_object_is_pinned (obj->data)) {
+                       if (obj->cardtable_mod_union) {
+                               mword obj_size = sgen_los_object_size (obj);
+                               mword num_cards = sgen_card_table_number_of_cards_in_range ((mword) obj->data, obj_size);
+                               memset (obj->cardtable_mod_union, 0, num_cards);
                        }
 
-                       sgen_los_unpin_object (bigobj->data);
-                       sgen_update_heap_boundaries ((mword)bigobj->data, (mword)bigobj->data + sgen_los_object_size (bigobj));
+                       sgen_los_unpin_object (obj->data);
+                       sgen_update_heap_boundaries ((mword)obj->data, (mword)obj->data + sgen_los_object_size (obj));
                } else {
-                       LOSObject *to_free;
-                       /* not referenced anywhere, so we can free it */
-                       if (prevbo)
-                               prevbo->next = bigobj->next;
-                       else
-                               sgen_los_object_list = bigobj->next;
-                       to_free = bigobj;
-                       bigobj = bigobj->next;
-                       sgen_los_free_object (to_free);
+                       FREE_CURRENT_LOS_OBJECT_NO_LOCK (obj);
+                       compact_los_objects = TRUE;
                        continue;
                }
-               prevbo = bigobj;
-               bigobj = bigobj->next;
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
+
+       /* Try to compact list, if needed. */
+       if (compact_los_objects) {
+               sgen_array_list_remove_nulls (&sgen_los_object_array_list);
+               compact_los_objects = FALSE;
        }
 
        /* Try to free memory */
@@ -529,7 +580,7 @@ sgen_ptr_is_in_los (char *ptr, char **start)
 
        if (start)
                *start = NULL;
-       for (obj = sgen_los_object_list; obj; obj = obj->next) {
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
                char *end = (char*)obj->data + sgen_los_object_size (obj);
 
                if (ptr >= (char*)obj->data && ptr < end) {
@@ -537,7 +588,7 @@ sgen_ptr_is_in_los (char *ptr, char **start)
                                *start = (char*)obj->data;
                        return TRUE;
                }
-       }
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
        return FALSE;
 }
 
@@ -546,8 +597,23 @@ sgen_los_iterate_objects (IterateObjectCallbackFunc cb, void *user_data)
 {
        LOSObject *obj;
 
-       for (obj = sgen_los_object_list; obj; obj = obj->next)
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
                cb (obj->data, sgen_los_object_size (obj), user_data);
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
+}
+
+void
+sgen_los_iterate_objects_free (IterateObjectResultCallbackFunc cb, void *user_data)
+{
+       LOSObject *obj;
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
+               if (cb (obj->data, sgen_los_object_size (obj), user_data)) {
+                       SGEN_LOG (4, "Freeing large object %p",obj->data);
+                       FREE_CURRENT_LOS_OBJECT_NO_LOCK (obj);
+                       compact_los_objects = TRUE;
+                       continue;
+               }
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
 }
 
 gboolean
@@ -555,10 +621,10 @@ sgen_los_is_valid_object (char *object)
 {
        LOSObject *obj;
 
-       for (obj = sgen_los_object_list; obj; obj = obj->next) {
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
                if ((char*)obj->data == object)
                        return TRUE;
-       }
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
        return FALSE;
 }
 
@@ -567,7 +633,7 @@ mono_sgen_los_describe_pointer (char *ptr)
 {
        LOSObject *obj;
 
-       for (obj = sgen_los_object_list; obj; obj = obj->next) {
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
                const char *los_kind;
                mword size;
                gboolean pinned;
@@ -591,7 +657,7 @@ mono_sgen_los_describe_pointer (char *ptr)
                }
 
                return TRUE;
-       }
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
        return FALSE;
 }
 
@@ -599,11 +665,11 @@ void
 sgen_los_iterate_live_block_ranges (sgen_cardtable_block_callback callback)
 {
        LOSObject *obj;
-       for (obj = sgen_los_object_list; obj; obj = obj->next) {
-               GCVTable vt = SGEN_LOAD_VTABLE (obj->data);
-               if (SGEN_VTABLE_HAS_REFERENCES (vt))
+       gboolean has_references;
+       FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK (obj, has_references) {
+               if (has_references)
                        callback ((mword)obj->data, sgen_los_object_size (obj));
-       }
+       } END_FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK;
 }
 
 static guint8*
@@ -628,17 +694,23 @@ void
 sgen_los_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int job_index, int job_split_count)
 {
        LOSObject *obj;
-       int i = 0;
+       gboolean has_references;
+       int first_object, last_object, index;
+       int object_count = sgen_los_object_array_list.next_slot / job_split_count;
 
        sgen_binary_protocol_los_card_table_scan_start (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
-       for (obj = sgen_los_object_list; obj; obj = obj->next, i++) {
+
+       first_object = object_count * job_index;
+       if (job_index == job_split_count - 1)
+               last_object = sgen_los_object_array_list.next_slot;
+       else
+               last_object = object_count * (job_index + 1);
+
+       FOREACH_LOS_OBJECT_RANGE_HAS_REFERENCES_NO_LOCK (obj, first_object, last_object, index, has_references) {
                mword num_cards = 0;
                guint8 *cards;
 
-               if (i % job_split_count != job_index)
-                       continue;
-
-               if (!SGEN_OBJECT_HAS_REFERENCES (obj->data))
+               if (!has_references)
                        continue;
 
                if (scan_type & CARDTABLE_SCAN_MOD_UNION) {
@@ -668,7 +740,7 @@ sgen_los_scan_card_table (CardTableScanType scan_type, ScanCopyContext ctx, int
 
                if (scan_type == CARDTABLE_SCAN_MOD_UNION_PRECLEAN)
                        sgen_free_internal_dynamic (cards, num_cards, INTERNAL_MEM_CARDTABLE_MOD_UNION);
-       }
+       } END_FOREACH_LOS_OBJECT_RANGE_HAS_REFERENCES_NO_LOCK;
        sgen_binary_protocol_los_card_table_scan_end (sgen_timestamp (), scan_type & CARDTABLE_SCAN_MOD_UNION);
 }
 
@@ -676,16 +748,17 @@ void
 sgen_los_count_cards (long long *num_total_cards, long long *num_marked_cards)
 {
        LOSObject *obj;
+       gboolean has_references;
        long long total_cards = 0;
        long long marked_cards = 0;
 
-       for (obj = sgen_los_object_list; obj; obj = obj->next) {
+       FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK (obj, has_references) {
                int i;
                guint8 *cards = sgen_card_table_get_card_scan_address ((mword) obj->data);
                guint8 *cards_end = sgen_card_table_get_card_scan_address ((mword) obj->data + sgen_los_object_size (obj) - 1);
                mword num_cards = (cards_end - cards) + 1;
 
-               if (!SGEN_OBJECT_HAS_REFERENCES (obj->data))
+               if (!has_references)
                        continue;
 
                total_cards += num_cards;
@@ -693,7 +766,7 @@ sgen_los_count_cards (long long *num_total_cards, long long *num_marked_cards)
                        if (cards [i])
                                ++marked_cards;
                }
-       }
+       } END_FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK;
 
        *num_total_cards = total_cards;
        *num_marked_cards = marked_cards;
@@ -703,13 +776,13 @@ void
 sgen_los_update_cardtable_mod_union (void)
 {
        LOSObject *obj;
-
-       for (obj = sgen_los_object_list; obj; obj = obj->next) {
-               if (!SGEN_OBJECT_HAS_REFERENCES (obj->data))
+       gboolean has_references;
+       FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK (obj, has_references) {
+               if (!has_references)
                        continue;
                sgen_card_table_update_mod_union (get_cardtable_mod_union_for_object (obj),
                                (char*)obj->data, sgen_los_object_size (obj), NULL);
-       }
+       } END_FOREACH_LOS_OBJECT_HAS_REFERENCES_NO_LOCK;
 }
 
 LOSObject*
@@ -726,6 +799,32 @@ sgen_los_pin_object (GCObject *data)
        sgen_binary_protocol_pin (data, (gpointer)SGEN_LOAD_VTABLE (data), sgen_safe_object_get_size (data));
 }
 
+void
+sgen_los_pin_objects (SgenGrayQueue *gray_queue, gboolean finish_concurrent_mode)
+{
+       LOSObject *obj;
+       FOREACH_LOS_OBJECT_NO_LOCK (obj) {
+               size_t dummy;
+               if (sgen_find_optimized_pin_queue_area ((char*)obj->data, (char*)obj->data + sgen_los_object_size (obj), &dummy, &dummy)) {
+                       sgen_binary_protocol_pin (obj->data, (gpointer)SGEN_LOAD_VTABLE (obj->data), sgen_safe_object_get_size (obj->data));
+
+                       if (sgen_los_object_is_pinned (obj->data)) {
+                               SGEN_ASSERT (0, finish_concurrent_mode == TRUE, "LOS objects can only be pinned here after concurrent marking.");
+                               continue;
+                       }
+                       sgen_los_pin_object (obj->data);
+                       if (SGEN_OBJECT_HAS_REFERENCES (obj->data))
+                               GRAY_OBJECT_ENQUEUE_SERIAL (gray_queue, obj->data, sgen_obj_get_descriptor ((GCObject*)obj->data));
+                       sgen_pin_stats_register_object (obj->data, GENERATION_OLD);
+                       SGEN_LOG (6, "Marked large object %p (%s) size: %lu from roots", obj->data,
+                                       sgen_client_vtable_get_name (SGEN_LOAD_VTABLE (obj->data)),
+                                       (unsigned long)sgen_los_object_size (obj));
+
+                       sgen_client_pinned_los_object (obj->data);
+               }
+       } END_FOREACH_LOS_OBJECT_NO_LOCK;
+}
+
 gboolean
 sgen_los_pin_object_par (GCObject *data)
 {