Implement card bundles for non-Windows platforms.
authorAditya Mandaleeka <adityam@microsoft.com>
Fri, 24 Feb 2017 18:23:43 +0000 (10:23 -0800)
committerAditya Mandaleeka <adityam@microsoft.com>
Tue, 28 Feb 2017 23:52:34 +0000 (15:52 -0800)
Commit migrated from https://github.com/dotnet/coreclr/commit/9ae6ed25affc42343a3f88f147ff4a5fbd1e9727

15 files changed:
src/coreclr/clrdefinitions.cmake
src/coreclr/src/gc/gc.cpp
src/coreclr/src/gc/gc.h
src/coreclr/src/gc/gccommon.cpp
src/coreclr/src/gc/gcinterface.h
src/coreclr/src/gc/gcpriv.h
src/coreclr/src/vm/amd64/jithelpers_fast.S
src/coreclr/src/vm/amd64/jithelpers_fastwritebarriers.S
src/coreclr/src/vm/amd64/jithelpers_slow.S
src/coreclr/src/vm/amd64/jitinterfaceamd64.cpp
src/coreclr/src/vm/gcenv.ee.cpp
src/coreclr/src/vm/gcheaputilities.cpp
src/coreclr/src/vm/gcheaputilities.h
src/coreclr/src/vm/gchelpers.cpp
src/coreclr/src/vm/jitinterface.h

index a2b920c..417835a 100644 (file)
@@ -189,6 +189,7 @@ if(CLR_CMAKE_PLATFORM_UNIX_AMD64)
 endif (CLR_CMAKE_PLATFORM_UNIX_AMD64)
 add_definitions(-DFEATURE_USE_ASM_GC_WRITE_BARRIERS)
 if(CLR_CMAKE_PLATFORM_ARCH_AMD64 AND NOT WIN32)
+  add_definitions(-DFEATURE_MANUALLY_MANAGED_CARD_BUNDLES)
   add_definitions(-DFEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP)
 endif(CLR_CMAKE_PLATFORM_ARCH_AMD64 AND NOT WIN32)
 add_definitions(-DFEATURE_VERSIONING)
index 1edefba..5813072 100644 (file)
@@ -1458,7 +1458,11 @@ inline bool can_use_write_watch_for_gc_heap()
 
 inline bool can_use_write_watch_for_card_table()
 {
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    return true;
+#else
     return can_use_hardware_write_watch();
+#endif
 }
 
 #else
@@ -2181,15 +2185,22 @@ void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_b
     args.operation = WriteBarrierOp::StompResize;
     args.is_runtime_suspended = is_runtime_suspended;
     args.requires_upper_bounds_check = requires_upper_bounds_check;
+
     args.card_table = g_gc_card_table;
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    args.card_bundle_table = g_gc_card_bundle_table;
+#endif
+
     args.lowest_address = g_gc_lowest_address;
     args.highest_address = g_gc_highest_address;
+
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
     if (SoftwareWriteWatch::IsEnabledForGCHeap())
     {
         args.write_watch_table = g_gc_sw_ww_table;
     }
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
+
     GCToEEInterface::StompWriteBarrier(&args);
 }
 
@@ -2210,6 +2221,11 @@ void stomp_write_barrier_initialize()
     args.is_runtime_suspended = true;
     args.requires_upper_bounds_check = false;
     args.card_table = g_gc_card_table;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    args.card_bundle_table = g_gc_card_bundle_table;
+#endif
+    
     args.lowest_address = g_gc_lowest_address;
     args.highest_address = g_gc_highest_address;
     args.ephemeral_low = reinterpret_cast<uint8_t*>(1);
@@ -6286,6 +6302,150 @@ void gc_heap::make_c_mark_list (uint8_t** arr)
 }
 #endif //BACKGROUND_GC
 
+
+#ifdef CARD_BUNDLE
+
+// The card bundle keeps track of groups of card words.
+static const size_t card_bundle_word_width = 32;
+
+// How do we express the fact that 32 bits (card_word_width) is one uint32_t?
+static const size_t card_bundle_size = (size_t)(OS_PAGE_SIZE / (sizeof(uint32_t)*card_bundle_word_width));
+
+inline
+size_t card_bundle_word (size_t cardb)
+{
+    return cardb / card_bundle_word_width;
+}
+
+inline
+uint32_t card_bundle_bit (size_t cardb)
+{
+    return (uint32_t)(cardb % card_bundle_word_width);
+}
+
+size_t align_cardw_on_bundle (size_t cardw)
+{
+    return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 ));
+}
+
+// Get the card bundle representing a card word
+size_t cardw_card_bundle (size_t cardw)
+{
+    return cardw / card_bundle_size;
+}
+
+// Get the first card word in a card bundle
+size_t card_bundle_cardw (size_t cardb)
+{
+    return cardb * card_bundle_size;
+}
+
+// Clear the specified card bundle
+void gc_heap::card_bundle_clear (size_t cardb)
+{
+    card_bundle_table [card_bundle_word (cardb)] &= ~(1 << card_bundle_bit (cardb));
+    dprintf (1,("Cleared card bundle %Ix [%Ix, %Ix[", cardb, (size_t)card_bundle_cardw (cardb),
+              (size_t)card_bundle_cardw (cardb+1)));
+}
+
+void gc_heap::card_bundle_set (size_t cardb)
+{
+    if (!card_bundle_set_p (cardb))
+    {
+        card_bundle_table [card_bundle_word (cardb)] |= (1 << card_bundle_bit (cardb));
+    }
+}
+
+// Set the card bundle bits between start_cardb and end_cardb
+void gc_heap::card_bundles_set (size_t start_cardb, size_t end_cardb)
+{
+    if (start_cardb == end_cardb)
+    {
+        card_bundle_set(start_cardb);
+        return;
+    }
+
+    size_t start_word = card_bundle_word (start_cardb);
+    size_t end_word = card_bundle_word (end_cardb);
+
+    if (start_word < end_word)
+    {
+        // Set the partial words
+        card_bundle_table [start_word] |= highbits (~0u, card_bundle_bit (start_cardb));
+
+        if (card_bundle_bit (end_cardb))
+            card_bundle_table [end_word] |= lowbits (~0u, card_bundle_bit (end_cardb));
+
+        // Set the full words
+        for (size_t i = start_word + 1; i < end_word; i++)
+            card_bundle_table [i] = ~0u;
+    }
+    else
+    {
+        card_bundle_table [start_word] |= (highbits (~0u, card_bundle_bit (start_cardb)) &
+                                            lowbits (~0u, card_bundle_bit (end_cardb)));
+    }
+}
+
+// Indicates whether the specified bundle is set.
+BOOL gc_heap::card_bundle_set_p (size_t cardb)
+{
+    return (card_bundle_table[card_bundle_word(cardb)] & (1 << card_bundle_bit (cardb)));
+}
+
+// Returns the size (in bytes) of a card bundle representing the region from 'from' to 'end'
+size_t size_card_bundle_of (uint8_t* from, uint8_t* end)
+{
+    // Number of heap bytes represented by a card bundle word
+    size_t cbw_span = card_size * card_word_width * card_bundle_size * card_bundle_word_width;
+
+    // Align the start of the region down
+    from = (uint8_t*)((size_t)from & ~(cbw_span - 1));
+
+    // Align the end of the region up
+    end = (uint8_t*)((size_t)(end + (cbw_span - 1)) & ~(cbw_span - 1));
+
+    // Make sure they're really aligned
+    assert (((size_t)from & (cbw_span - 1)) == 0);
+    assert (((size_t)end  & (cbw_span - 1)) == 0);
+
+    return ((end - from) / cbw_span) * sizeof (uint32_t);
+}
+
+// Takes a pointer to a card bundle table and an address, and returns a pointer that represents
+// where a theoretical card bundle table that represents every address (starting from 0) would
+// start if the bundle word representing the address were to be located at the pointer passed in.
+// The returned 'translated' pointer makes it convenient/fast to calculate where the card bundle
+// for a given address is using a simple shift operation on the address.
+uint32_t* translate_card_bundle_table (uint32_t* cb, uint8_t* lowest_address)
+{
+    // The number of bytes of heap memory represented by a card bundle word
+    const size_t heap_bytes_for_bundle_word = card_size * card_word_width * card_bundle_size * card_bundle_word_width;
+
+    // Each card bundle word is 32 bits
+    return (uint32_t*)((uint8_t*)cb - (((size_t)lowest_address / heap_bytes_for_bundle_word) * sizeof (uint32_t)));
+}
+
+void gc_heap::enable_card_bundles ()
+{
+    if (can_use_write_watch_for_card_table() && (!card_bundles_enabled()))
+    {
+        dprintf (1, ("Enabling card bundles"));
+
+        // We initially set all of the card bundles
+        card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))),
+                          cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address)))));
+        settings.card_bundles = TRUE;
+    }
+}
+
+BOOL gc_heap::card_bundles_enabled ()
+{
+    return settings.card_bundles;
+}
+
+#endif // CARD_BUNDLE
+
 #if defined (_TARGET_AMD64_)
 #define brick_size ((size_t)4096)
 #else
@@ -6415,8 +6575,18 @@ void gc_heap::clear_card (size_t card)
 inline
 void gc_heap::set_card (size_t card)
 {
-    card_table [card_word (card)] =
-        (card_table [card_word (card)] | (1 << card_bit (card)));
+    size_t word = card_word (card);
+    card_table[word] = (card_table [word] | (1 << card_bit (card)));
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    // Also set the card bundle that corresponds to the card
+    size_t bundle_to_set = cardw_card_bundle(word);
+
+    card_bundle_set(bundle_to_set);
+
+    dprintf (3,("Set card %Ix [%Ix, %Ix[ and bundle %Ix", card, (size_t)card_address (card), (size_t)card_address (card+1), bundle_to_set));
+    assert(card_bundle_set_p(bundle_to_set) != 0);
+#endif
 }
 
 inline
@@ -6439,116 +6609,6 @@ size_t size_card_of (uint8_t* from, uint8_t* end)
     return count_card_of (from, end) * sizeof(uint32_t);
 }
 
-#ifdef CARD_BUNDLE
-
-//The card bundle keeps track of groups of card words
-#define card_bundle_word_width ((size_t)32)
-//how do we express the fact that 32 bits (card_word_width) is one uint32_t?
-#define card_bundle_size ((size_t)(OS_PAGE_SIZE/(sizeof (uint32_t)*card_bundle_word_width)))
-
-inline
-size_t card_bundle_word (size_t cardb)
-{
-    return cardb / card_bundle_word_width;
-}
-
-inline
-uint32_t card_bundle_bit (size_t cardb)
-{
-    return (uint32_t)(cardb % card_bundle_word_width);
-}
-
-size_t align_cardw_on_bundle (size_t cardw)
-{
-    return ((size_t)(cardw + card_bundle_size - 1) & ~(card_bundle_size - 1 ));
-}
-
-size_t cardw_card_bundle (size_t cardw)
-{
-    return cardw/card_bundle_size;
-}
-
-size_t card_bundle_cardw (size_t cardb)
-{
-    return cardb*card_bundle_size;
-}
-
-void gc_heap::card_bundle_clear(size_t cardb)
-{
-    card_bundle_table [card_bundle_word (cardb)] &= ~(1 << card_bundle_bit (cardb));
-    dprintf (3,("Cleared card bundle %Ix [%Ix, %Ix[", cardb, (size_t)card_bundle_cardw (cardb),
-              (size_t)card_bundle_cardw (cardb+1)));
-//    printf ("Cleared card bundle %Ix\n", cardb);
-}
-
-void gc_heap::card_bundles_set (size_t start_cardb, size_t end_cardb)
-{
-    size_t start_word = card_bundle_word (start_cardb);
-    size_t end_word = card_bundle_word (end_cardb);
-    if (start_word < end_word)
-    {
-        //set the partial words
-        card_bundle_table [start_word] |= highbits (~0u, card_bundle_bit (start_cardb));
-
-        if (card_bundle_bit (end_cardb))
-            card_bundle_table [end_word] |= lowbits (~0u, card_bundle_bit (end_cardb));
-
-        for (size_t i = start_word+1; i < end_word; i++)
-            card_bundle_table [i] = ~0u;
-
-    }
-    else
-    {
-        card_bundle_table [start_word] |= (highbits (~0u, card_bundle_bit (start_cardb)) &
-                                           lowbits (~0u, card_bundle_bit (end_cardb)));
-
-    }
-
-}
-
-BOOL gc_heap::card_bundle_set_p (size_t cardb)
-{
-    return ( card_bundle_table [ card_bundle_word (cardb) ] & (1 << card_bundle_bit (cardb)));
-}
-
-size_t size_card_bundle_of (uint8_t* from, uint8_t* end)
-{
-    //align from to lower
-    from = (uint8_t*)((size_t)from & ~(card_size*card_word_width*card_bundle_size*card_bundle_word_width - 1));
-    //align to to upper
-    end = (uint8_t*)((size_t)(end + (card_size*card_word_width*card_bundle_size*card_bundle_word_width - 1)) &
-                  ~(card_size*card_word_width*card_bundle_size*card_bundle_word_width - 1));
-
-    assert (((size_t)from & ((card_size*card_word_width*card_bundle_size*card_bundle_word_width)-1)) == 0);
-    assert (((size_t)end  & ((card_size*card_word_width*card_bundle_size*card_bundle_word_width)-1)) == 0);
-
-    return ((end - from) / (card_size*card_word_width*card_bundle_size*card_bundle_word_width)) * sizeof (uint32_t);
-}
-
-uint32_t* translate_card_bundle_table (uint32_t* cb)
-{
-    return (uint32_t*)((uint8_t*)cb - ((((size_t)g_gc_lowest_address) / (card_size*card_word_width*card_bundle_size*card_bundle_word_width)) * sizeof (uint32_t)));
-}
-
-void gc_heap::enable_card_bundles ()
-{
-    if (can_use_write_watch_for_card_table() && (!card_bundles_enabled()))
-    {
-        dprintf (3, ("Enabling card bundles"));
-        //set all of the card bundles
-        card_bundles_set (cardw_card_bundle (card_word (card_of (lowest_address))),
-                          cardw_card_bundle (align_cardw_on_bundle (card_word (card_of (highest_address)))));
-        settings.card_bundles = TRUE;
-    }
-}
-
-BOOL gc_heap::card_bundles_enabled ()
-{
-    return settings.card_bundles;
-}
-
-#endif //CARD_BUNDLE
-
 // We don't store seg_mapping_table in card_table_info because there's only always one view.
 class card_table_info
 {
@@ -6871,6 +6931,10 @@ void release_card_table (uint32_t* c_table)
             if (&g_gc_card_table[card_word (gcard_of(g_gc_lowest_address))] == c_table)
             {
                 g_gc_card_table = 0;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+                g_gc_card_bundle_table = 0;
+#endif
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
                 SoftwareWriteWatch::StaticClose();
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
@@ -6919,8 +6983,12 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
 #ifdef CARD_BUNDLE
     if (can_use_write_watch_for_card_table())
     {
-        virtual_reserve_flags |= VirtualReserveFlags::WriteWatch;
         cb = size_card_bundle_of (g_gc_lowest_address, g_gc_highest_address);
+#ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        // If we're not manually managing the card bundles, we will need to use OS write
+        // watch APIs over this region to track changes.
+        virtual_reserve_flags |= VirtualReserveFlags::WriteWatch;
+#endif
     }
 #endif //CARD_BUNDLE
 
@@ -6979,6 +7047,11 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end)
 
 #ifdef CARD_BUNDLE
     card_table_card_bundle_table (ct) = (uint32_t*)((uint8_t*)card_table_brick_table (ct) + bs);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), g_gc_lowest_address);
+#endif
+
 #endif //CARD_BUNDLE
 
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
@@ -7089,6 +7162,11 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
         bool write_barrier_updated = false;
         uint32_t virtual_reserve_flags = VirtualReserveFlags::None;
         uint32_t* saved_g_card_table = g_gc_card_table;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        uint32_t* saved_g_card_bundle_table = g_gc_card_bundle_table;
+#endif
+
         uint32_t* ct = 0;
         uint32_t* translated_ct = 0;
         short* bt = 0;
@@ -7109,8 +7187,13 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
 #ifdef CARD_BUNDLE
         if (can_use_write_watch_for_card_table())
         {
-            virtual_reserve_flags = VirtualReserveFlags::WriteWatch;
             cb = size_card_bundle_of (saved_g_lowest_address, saved_g_highest_address);
+
+#ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            // If we're not manually managing the card bundles, we will need to use OS write
+            // watch APIs over this region to track changes.
+            virtual_reserve_flags |= VirtualReserveFlags::WriteWatch;
+#endif
         }
 #endif //CARD_BUNDLE
 
@@ -7272,6 +7355,11 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
             }
 
             g_gc_card_table = translated_ct;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), saved_g_lowest_address);
+#endif
+
             SoftwareWriteWatch::SetResizedUntranslatedTable(
                 mem + sw_ww_table_offset,
                 saved_g_lowest_address,
@@ -7296,6 +7384,10 @@ int gc_heap::grow_brick_card_tables (uint8_t* start,
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
         {
             g_gc_card_table = translated_ct;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            g_gc_card_bundle_table = translate_card_bundle_table(card_table_card_bundle_table(ct), saved_g_lowest_address);
+#endif
         }
 
         seg_mapping_table = new_seg_mapping_table;
@@ -7327,6 +7419,10 @@ fail:
         {
             assert(g_gc_card_table == saved_g_card_table);
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            assert(g_gc_card_bundle_table  == saved_g_card_bundle_table);
+#endif
+
             //delete (uint32_t*)((uint8_t*)ct - sizeof(card_table_info));
             if (!GCToOSInterface::VirtualRelease (mem, alloc_size_aligned))
             {
@@ -7430,12 +7526,23 @@ void gc_heap::copy_brick_card_range (uint8_t* la, uint32_t* old_card_table,
             (card_table_lowest_address (ct) <= start))
         {
             // or the card_tables
-            uint32_t* dest = &card_table [card_word (card_of (start))];
-            uint32_t* src = &((translate_card_table (ct)) [card_word (card_of (start))]);
+
+            size_t start_word = card_word (card_of (start));
+
+            uint32_t* dest = &card_table[start_word];
+            uint32_t* src = &((translate_card_table (ct))[start_word]);
             ptrdiff_t count = count_card_of (start, end);
             for (int x = 0; x < count; x++)
             {
                 *dest |= *src;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+                if (*src != 0)
+                {
+                    card_bundle_set(cardw_card_bundle(start_word+x));
+                }
+#endif
+
                 dest++;
                 src++;
             }
@@ -7511,7 +7618,10 @@ void gc_heap::copy_brick_card_table()
     size_t st = 0;
 #endif //GROWABLE_SEG_MAPPING_TABLE
 #endif //MARK_ARRAY && _DEBUG
-    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct));
+    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct), g_gc_lowest_address);
+
+    // Ensure that the word that represents g_gc_lowest_address in the translated table is located at the
+    // start of the untranslated table.
     assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_gc_lowest_address))))] ==
             card_table_card_bundle_table (ct));
 
@@ -9333,89 +9443,112 @@ static unsigned int tot_cycles = 0;
 
 #ifdef CARD_BUNDLE
 
+inline void gc_heap::verify_card_bundle_bits_set(size_t first_card_word, size_t last_card_word)
+{
+#ifdef _DEBUG
+    for (size_t x = cardw_card_bundle (first_card_word); x < cardw_card_bundle (last_card_word); x++)
+    {
+        if (!card_bundle_set_p (x))
+        {
+            assert (!"Card bundle not set");
+            dprintf (3, ("Card bundle %Ix not set", x));
+        }
+    }
+#endif
+}
+
+// Verifies that any bundles that are not set represent only cards that are not set.
+inline void gc_heap::verify_card_bundles()
+{
+#ifdef _DEBUG
+    size_t lowest_card = card_word (card_of (lowest_address));
+    size_t highest_card = card_word (card_of (highest_address));
+    size_t cardb = cardw_card_bundle (lowest_card);
+    size_t end_cardb = cardw_card_bundle (align_cardw_on_bundle (highest_card));
+
+    while (cardb < end_cardb)
+    {
+        uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb), lowest_card)];
+        uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1), highest_card)];
+
+        if (card_bundle_set_p (cardb) == 0)
+        {
+            // Verify that no card is set
+            while (card_word < card_word_end)
+            {
+                if (*card_word != 0)
+                {
+                    dprintf  (3, ("gc: %d, Card word %Ix for address %Ix set, card_bundle %Ix clear",
+                            dd_collection_count (dynamic_data_of (0)), 
+                            (size_t)(card_word-&card_table[0]),
+                            (size_t)(card_address ((size_t)(card_word-&card_table[0]) * card_word_width)), cardb));
+                }
+
+                assert((*card_word)==0);
+                card_word++;
+            }
+        }
+
+        cardb++;
+    }
+#endif
+}
+
+// If card bundles are enabled, use write watch to find pages in the card table that have 
+// been dirtied, and set the corresponding card bundle bits.
 void gc_heap::update_card_table_bundle()
 {
     if (card_bundles_enabled())
     {
+        // The address of the card word containing the card representing the lowest heap address
         uint8_t* base_address = (uint8_t*)(&card_table[card_word (card_of (lowest_address))]);
+
+        // The address of the card word containing the card representing the highest heap address
+        uint8_t* high_address = (uint8_t*)(&card_table[card_word (card_of (highest_address))]);
+        
         uint8_t* saved_base_address = base_address;
         uintptr_t bcount = array_size;
-        uint8_t* high_address = (uint8_t*)(&card_table[card_word (card_of (highest_address))]);
         size_t saved_region_size = align_on_page (high_address) - saved_base_address;
 
         do
         {
             size_t region_size = align_on_page (high_address) - base_address;
+
             dprintf (3,("Probing card table pages [%Ix, %Ix[", (size_t)base_address, (size_t)base_address+region_size));
-            bool success = GCToOSInterface::GetWriteWatch (false /* resetState */ , base_address, region_size,
-                                                           (void**)g_addresses,
-                                                           &bcount);
+            bool success = GCToOSInterface::GetWriteWatch(false /* resetState */,
+                                                          base_address,
+                                                          region_size,
+                                                          (void**)g_addresses,
+                                                          &bcount);
             assert (success && "GetWriteWatch failed!");
+
             dprintf (3,("Found %d pages written", bcount));
-            for (unsigned  i = 0; i < bcount; i++)
+            for (unsigned i = 0; i < bcount; i++)
             {
+                // Offset of the dirty page from the start of the card table (clamped to base_address)
                 size_t bcardw = (uint32_t*)(max(g_addresses[i],base_address)) - &card_table[0];
+
+                // Offset of the end of the page from the start of the card table (clamped to high addr)
                 size_t ecardw = (uint32_t*)(min(g_addresses[i]+OS_PAGE_SIZE, high_address)) - &card_table[0];
                 assert (bcardw >= card_word (card_of (g_gc_lowest_address)));
 
-                card_bundles_set (cardw_card_bundle (bcardw),
-                                  cardw_card_bundle (align_cardw_on_bundle (ecardw)));
-
-                dprintf (3,("Set Card bundle [%Ix, %Ix[",
-                            cardw_card_bundle (bcardw), cardw_card_bundle (align_cardw_on_bundle (ecardw))));
-
-#ifdef _DEBUG
-                for (size_t x = cardw_card_bundle (bcardw); x < cardw_card_bundle (ecardw); x++)
-                {
-                    if (!card_bundle_set_p (x))
-                    {
-                        assert (!"Card bundle not set");
-                        dprintf (3, ("Card bundle %Ix not set", x));
-                    }
-                }
-#endif //_DEBUG
+                // Set the card bundle bits representing the dirty card table page
+                card_bundles_set (cardw_card_bundle (bcardw), cardw_card_bundle (align_cardw_on_bundle (ecardw)));
+                dprintf (3,("Set Card bundle [%Ix, %Ix[", cardw_card_bundle (bcardw), cardw_card_bundle (align_cardw_on_bundle (ecardw))));
 
+                verify_card_bundle_bits_set(bcardw, ecardw);
             }
-            if (bcount >= array_size){
+
+            if (bcount >= array_size)
+            {
                 base_address = g_addresses [array_size-1] + OS_PAGE_SIZE;
                 bcount = array_size;
             }
+
         } while ((bcount >= array_size) && (base_address < high_address));
 
+        // Now that we've updated the card bundle bits, reset the write-tracking state. 
         GCToOSInterface::ResetWriteWatch (saved_base_address, saved_region_size);
-
-#ifdef _DEBUG
-
-        size_t lowest_card = card_word (card_of (lowest_address));
-        size_t highest_card = card_word (card_of (highest_address));
-        size_t cardb = cardw_card_bundle (lowest_card);
-        size_t end_cardb = cardw_card_bundle (align_cardw_on_bundle (highest_card));
-
-        //find a non null bundle
-        while (cardb < end_cardb)
-        {
-            if (card_bundle_set_p (cardb)==0)
-            {
-                //verify that the cards are indeed empty
-                uint32_t* card_word = &card_table[max(card_bundle_cardw (cardb), lowest_card)];
-                uint32_t* card_word_end = &card_table[min(card_bundle_cardw (cardb+1), highest_card)];
-                while (card_word < card_word_end)
-                {
-                    if ((*card_word) != 0)
-                    {
-                        dprintf  (3, ("gc: %d, Card word %Ix for address %Ix set, card_bundle %Ix clear",
-                                dd_collection_count (dynamic_data_of (0)), 
-                                (size_t)(card_word-&card_table[0]),
-                                (size_t)(card_address ((size_t)(card_word-&card_table[0]) * card_word_width)), cardb));
-                    }
-                    assert((*card_word)==0);
-                    card_word++;
-                }
-            }
-            //end of verification
-            cardb++;
-        }
-#endif //_DEBUG
     }
 }
 #endif //CARD_BUNDLE
@@ -10338,7 +10471,7 @@ gc_heap::init_gc_heap (int  h_number)
     lowest_address = card_table_lowest_address (ct);
 
 #ifdef CARD_BUNDLE
-    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct));
+    card_bundle_table = translate_card_bundle_table (card_table_card_bundle_table (ct), g_gc_lowest_address);
     assert (&card_bundle_table [card_bundle_word (cardw_card_bundle (card_word (card_of (g_gc_lowest_address))))] ==
             card_table_card_bundle_table (ct));
 #endif //CARD_BUNDLE
@@ -15211,6 +15344,10 @@ void gc_heap::gc1()
 
     assert (g_gc_card_table == card_table);
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    assert (g_gc_card_bundle_table == card_bundle_table);
+#endif    
+
     {
         if (n == max_generation)
         {
@@ -18598,6 +18735,12 @@ void gc_heap::fix_card_table ()
                 dprintf (2,("Set Cards [%Ix:%Ix, %Ix:%Ix[",
                       card_of (g_addresses [i]), (size_t)g_addresses [i],
                       card_of (g_addresses [i]+OS_PAGE_SIZE), (size_t)g_addresses [i]+OS_PAGE_SIZE));
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    // We don't need to update card bundles here because this function is only used when
+    // we don't have write barriers.
+    #error Cannot have manually managed card bundles without write barriers.
+#endif
             }
 
             if (bcount >= array_size){
@@ -19536,7 +19679,15 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
             {
 #endif //MULTIPLE_HEAPS
 
-                update_card_table_bundle ();
+#ifndef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+                // If we are manually managing card bundles, every write to the card table should already be
+                // accounted for in the card bundle table so there's nothing to update here.
+                update_card_table_bundle();
+#endif
+                if (card_bundles_enabled())
+                {
+                    verify_card_bundles();
+                }
 
 #ifdef MULTIPLE_HEAPS
                 gc_t_join.r_restart();
@@ -27019,6 +27170,14 @@ void gc_heap::copy_cards (size_t dst_card,
         if (!(++dstbit % 32))
         {
             card_table[dstwrd] = dsttmp;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            if (dsttmp != 0)
+            {
+                card_bundle_set(cardw_card_bundle(dstwrd));
+            }
+#endif
+
             dstwrd++;
             dsttmp = card_table[dstwrd];
             dstbit = 0;
@@ -27026,6 +27185,13 @@ void gc_heap::copy_cards (size_t dst_card,
     }
 
     card_table[dstwrd] = dsttmp;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    if (dsttmp != 0)
+    {
+        card_bundle_set(cardw_card_bundle(dstwrd));
+    }
+#endif
 }
 
 void gc_heap::copy_cards_for_addresses (uint8_t* dest, uint8_t* src, size_t len)
@@ -27086,6 +27252,10 @@ void gc_heap::copy_cards_for_addresses (uint8_t* dest, uint8_t* src, size_t len)
 
     if (card_set_p (card_of (src + len - 1)))
         set_card (end_dest_card);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    card_bundles_set(cardw_card_bundle(card_word(card_of(dest))), cardw_card_bundle(align_cardw_on_bundle(card_word(end_dest_card))));
+#endif
 }
 
 #ifdef BACKGROUND_GC
@@ -33336,6 +33506,10 @@ HRESULT GCHeap::Shutdown ()
     {
         destroy_card_table (ct);
         g_gc_card_table = nullptr;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        g_gc_card_bundle_table = nullptr;
+#endif
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
         SoftwareWriteWatch::StaticClose();
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
index 7332e42..8e11dd2 100644 (file)
@@ -137,6 +137,10 @@ class DacHeapWalker;
 
 #define MP_LOCKS
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+extern "C" uint32_t* g_gc_card_bundle_table;
+#endif
+
 extern "C" uint32_t* g_gc_card_table;
 extern "C" uint8_t* g_gc_lowest_address;
 extern "C" uint8_t* g_gc_highest_address;
index 133f05e..80907a2 100644 (file)
@@ -39,6 +39,11 @@ uint8_t* g_shadow_lowest_address = NULL;
 #endif
 
 uint32_t* g_gc_card_table;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+uint32_t* g_gc_card_bundle_table;
+#endif
+
 uint8_t* g_gc_lowest_address  = 0;
 uint8_t* g_gc_highest_address = 0;
 bool g_fFinalizerRunOnShutDown = false;
index 99d79df..d67e561 100644 (file)
@@ -77,6 +77,10 @@ struct WriteBarrierParameters
     // card table. Used for WriteBarrierOp::Initialize and WriteBarrierOp::StompResize.
     uint32_t* card_table;
 
+    // The new card bundle table location. May or may not be the same as the previous
+    // card bundle table. Used for WriteBarrierOp::Initialize and WriteBarrierOp::StompResize.
+    uint32_t* card_bundle_table;
+
     // The heap's new low boundary. May or may not be the same as the previous
     // value. Used for WriteBarrierOp::Initialize and WriteBarrierOp::StompResize.
     uint8_t* lowest_address;
index 056b383..dc857c0 100644 (file)
@@ -1718,8 +1718,14 @@ protected:
     PER_HEAP
     void card_bundle_clear(size_t cardb);
     PER_HEAP
+    void card_bundle_set (size_t cardb);
+    PER_HEAP
     void card_bundles_set (size_t start_cardb, size_t end_cardb);
     PER_HEAP
+    void verify_card_bundle_bits_set(size_t first_card_word, size_t last_card_word);
+    PER_HEAP
+    void verify_card_bundles();
+    PER_HEAP
     BOOL card_bundle_set_p (size_t cardb);
     PER_HEAP
     BOOL find_card_dword (size_t& cardw, size_t cardw_end);
index 8076655..975e488 100644 (file)
@@ -119,6 +119,22 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
 
     UpdateCardTable:
         mov     byte ptr [rdi + rax], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP_2_BYTE // padding for alignment of constant
+        shr     rdi, 0x0A
+
+        movabs  rax, 0xF0F0F0F0F0F0F0F0
+        cmp     byte ptr [rdi + rax], 0FFh
+
+        .byte 0x75, 0x02
+        // jne     UpdateCardBundle_WriteWatch_PostGrow64
+        REPRET
+
+    UpdateCardBundle_WriteWatch_PostGrow64:
+        mov     byte ptr [rdi + rax], 0FFh
+#endif
+
         ret
 
     .balign 16
@@ -163,12 +179,32 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
         // Touch the card table entry, if not already dirty.
         shr     rdi, 0Bh
         cmp     byte ptr [rdi + rax], 0FFh
-        // jne     UpdateCardTable
         .byte 0x75, 0x02
+        // jne     UpdateCardTable
         REPRET
 
     UpdateCardTable:
         mov     byte ptr [rdi + rax], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP_3_BYTE // padding for alignment of constant
+        NOP_3_BYTE // padding for alignment of constant
+
+        movabs  rax, 0xF0F0F0F0F0F0F0F0
+
+        // Touch the card bundle, if not already dirty.
+        // rdi is already shifted by 0xB, so shift by 0xA more
+        shr     rdi, 0x0A
+        cmp     byte ptr [rdi + rax], 0FFh
+
+        .byte 0x75, 0x02 
+        // jne     UpdateCardBundle
+        REPRET
+
+    UpdateCardBundle:
+        mov     byte ptr [rdi + rax], 0FFh
+#endif
+
         ret
 
     .balign 16
@@ -303,16 +339,36 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
         // Check if we need to update the card table
         // Calc pCardByte
         shr     rcx, 0x0B
+        
         PREPARE_EXTERNAL_VAR g_card_table, rax
-        add     rcx, [rax]
+        mov     rax, [rax]
 
         // Check if this card is dirty
-        cmp     byte ptr [rcx], 0FFh
+        cmp     byte ptr [rcx + rax], 0FFh
+
         jne     UpdateCardTable_ByRefWriteBarrier
         REPRET
 
     UpdateCardTable_ByRefWriteBarrier:
+        mov     byte ptr [rcx + rax], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
+        shr     rcx, 0x0A
+        
+        PREPARE_EXTERNAL_VAR g_card_bundle_table, rax
+        add     rcx, [rax]
+
+        // Check if this bundle byte is dirty
+        cmp     byte ptr [rcx], 0FFh
+
+        jne     UpdateCardBundle_ByRefWriteBarrier
+        REPRET
+
+    UpdateCardBundle_ByRefWriteBarrier:
         mov     byte ptr [rcx], 0FFh
+#endif
+
         ret
 
     .balign 16
index 6d61b26..eff33cd 100644 (file)
@@ -23,7 +23,12 @@ PATCH_LABEL JIT_WriteBarrier_PreGrow64_Patch_Label_Lower
 
         // Check the lower ephemeral region bound.
         cmp     rsi, rax
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        .byte 0x72, 0x4B
+#else
         .byte 0x72, 0x23
+#endif
         // jb      Exit_PreGrow64
 
         nop // padding for alignment of constant
@@ -40,6 +45,27 @@ PATCH_LABEL JIT_WriteBarrier_PreGrow64_Patch_Label_CardTable
 
     UpdateCardTable_PreGrow64:
         mov     byte ptr [rdi + rax], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP_3_BYTE // padding for alignment of constant
+        NOP_3_BYTE // padding for alignment of constant
+
+PATCH_LABEL JIT_WriteBarrier_PreGrow64_Patch_Label_CardBundleTable
+        movabs  rax, 0xF0F0F0F0F0F0F0F0
+
+        // Touch the card bundle, if not already dirty.
+        // rdi is already shifted by 0xB, so shift by 0xA more
+        shr     rdi, 0x0A
+        cmp     byte ptr [rdi + rax], 0FFh
+
+        .byte 0x75, 0x02 
+        // jne     UpdateCardBundle_PreGrow64
+        REPRET
+
+    UpdateCardBundle_PreGrow64:
+        mov     byte ptr [rdi + rax], 0FFh
+#endif
+
         ret
 
     .balign 16
@@ -69,7 +95,12 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_Lower
 
         // Check the lower and upper ephemeral region bounds
         cmp     rsi, rax
-        .byte 0x72,0x33
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        .byte 0x72, 0x53
+#else
+        .byte 0x72, 0x33
+#endif
         // jb      Exit_PostGrow64
 
         nop // padding for alignment of constant
@@ -78,7 +109,12 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_Upper
         movabs  r8, 0xF0F0F0F0F0F0F0F0
 
         cmp     rsi, r8
-        .byte 0x73,0x23
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        .byte 0x73, 0x43
+#else
+        .byte 0x73, 0x23
+#endif
         // jae     Exit_PostGrow64
 
         nop // padding for alignment of constant
@@ -95,6 +131,27 @@ PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable
 
     UpdateCardTable_PostGrow64:
         mov     byte ptr [rdi + rax], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP_3_BYTE // padding for alignment of constant
+        NOP_3_BYTE // padding for alignment of constant
+
+PATCH_LABEL JIT_WriteBarrier_PostGrow64_Patch_Label_CardBundleTable
+        movabs  rax, 0xF0F0F0F0F0F0F0F0
+
+        // Touch the card bundle, if not already dirty.
+        // rdi is already shifted by 0xB, so shift by 0xA more
+        shr     rdi, 0x0A
+        cmp     byte ptr [rdi + rax], 0FFh
+
+        .byte 0x75, 0x02 
+        // jne     UpdateCardBundle_PostGrow64
+        REPRET
+
+    UpdateCardBundle_PostGrow64:
+        mov     byte ptr [rdi + rax], 0FFh
+#endif
+
         ret
 
     .balign 16
@@ -134,7 +191,28 @@ PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardTable
 
     UpdateCardTable_SVR64:
         mov     byte ptr [rdi + rax], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP_3_BYTE // padding for alignment of constant
+        NOP_3_BYTE // padding for alignment of constant
+
+PATCH_LABEL JIT_WriteBarrier_SVR64_PatchLabel_CardBundleTable
+        movabs  rax, 0xF0F0F0F0F0F0F0F0
+
+        // Shift the address by 0xA more since already shifted by 0xB
+        shr     rdi, 0x0A
+        cmp     byte ptr [rdi + rax], 0FFh
+
+        .byte 0x75, 0x02 
+        // jne     UpdateCardBundle_SVR64
+        REPRET
+
+    UpdateCardBundle_SVR64:
+        mov     byte ptr [rdi + rax], 0FFh
+#endif
+
         ret
+
 LEAF_END_MARKED JIT_WriteBarrier_SVR64, _TEXT
 
 #endif
@@ -174,7 +252,13 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower
     CheckCardTable_WriteWatch_PreGrow64:
         // Check the lower ephemeral region bound.
         cmp     rsi, r11
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        .byte 0x72, 0x40
+#else
         .byte 0x72, 0x20
+#endif
+
         // jb      Exit_WriteWatch_PreGrow64
 
         // Touch the card table entry, if not already dirty.
@@ -189,6 +273,23 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable
 
     UpdateCardTable_WriteWatch_PreGrow64:
         mov     byte ptr [rdi + rax], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP_2_BYTE // padding for alignment of constant
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardBundleTable
+        movabs  rax, 0xF0F0F0F0F0F0F0F0
+
+        shr     rdi, 0x0A
+        cmp     byte ptr [rdi + rax], 0FFh
+
+        .byte 0x75, 0x02
+        // jne     UpdateCardBundle_WriteWatch_PreGrow64
+        REPRET
+
+    UpdateCardBundle_WriteWatch_PreGrow64:
+        mov     byte ptr [rdi + rax], 0FFh
+#endif
+
         ret
 
     .balign 16
@@ -231,7 +332,12 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower
         // Check the lower and upper ephemeral region bounds
     CheckCardTable_WriteWatch_PostGrow64:
         cmp     rsi, r11
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        .byte 0x72, 0x55
+#else
         .byte 0x72, 0x3d
+#endif
         // jb      Exit_WriteWatch_PostGrow64
 
         NOP_3_BYTE // padding for alignment of constant
@@ -240,7 +346,12 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper
         movabs  r10, 0xF0F0F0F0F0F0F0F0
 
         cmp     rsi, r10
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        .byte 0x73, 0x43
+#else
         .byte 0x73, 0x2b
+#endif
         // jae     Exit_WriteWatch_PostGrow64
 
         nop // padding for alignment of constant
@@ -257,8 +368,24 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable
 
     UpdateCardTable_WriteWatch_PostGrow64:
         mov     byte ptr [rdi + rax], 0FFh
-        ret
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP_2_BYTE // padding for alignment of constant
+        shr     rdi, 0x0A
+
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardBundleTable
+        movabs  rax, 0xF0F0F0F0F0F0F0F0
+        cmp     byte ptr [rdi + rax], 0FFh
+
+        .byte 0x75, 0x02
+        // jne     UpdateCardBundle_WriteWatch_PostGrow64
+        REPRET
+
+    UpdateCardBundle_WriteWatch_PostGrow64:
+        mov     byte ptr [rdi + rax], 0FFh
+#endif
+
+        ret
     .balign 16
     Exit_WriteWatch_PostGrow64:
         REPRET
@@ -279,7 +406,7 @@ LEAF_ENTRY JIT_WriteBarrier_WriteWatch_SVR64, _TEXT
         //
         // SVR GC has multiple heaps, so it cannot provide one single 
         // ephemeral region to bounds check against, so we just skip the
-        // bounds checking altogether and do our card table update 
+        // bounds checking altogether and do our card table update 
         // unconditionally.
         //
 
@@ -312,7 +439,25 @@ PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable
 
     UpdateCardTable_WriteWatch_SVR64:
         mov     byte ptr [rdi + r11], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        NOP // padding for alignment of constant
+
+PATCH_LABEL JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardBundleTable
+        movabs  r11, 0xF0F0F0F0F0F0F0F0
+
+        shr     rdi, 0x0A
+        cmp     byte ptr [rdi + r11], 0FFh
+        .byte 0x75, 0x02
+        // jne     UpdateCardBundle_WriteWatch_SVR64
+        REPRET
+
+    UpdateCardBundle_WriteWatch_SVR64:
+        mov     byte ptr [rdi + r11], 0FFh
+#endif
+
         ret
+
 LEAF_END_MARKED JIT_WriteBarrier_WriteWatch_SVR64, _TEXT
 
 #endif
index 0a5da69..f61b42a 100644 (file)
@@ -94,16 +94,36 @@ LEAF_ENTRY JIT_WriteBarrier_Debug, _TEXT
         // Check if we need to update the card table
         // Calc pCardByte
         shr     rdi, 0x0B
+
         PREPARE_EXTERNAL_VAR g_card_table, r10
-        add     rdi, [r10]
+        mov     r10, [r10]
 
         // Check if this card is dirty
-        cmp     byte ptr [rdi], 0FFh
+        cmp     byte ptr [rdi + r10], 0FFh
+
         jne     UpdateCardTable_Debug
         REPRET
 
     UpdateCardTable_Debug:
+        mov     byte ptr [rdi + r10], 0FFh
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        // Shift rdi by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
+        shr     rdi, 0x0A
+
+        PREPARE_EXTERNAL_VAR g_card_bundle_table, r10
+        add     rdi, [r10]
+
+        // Check if this bundle byte is dirty
+        cmp     byte ptr [rdi], 0FFh
+
+        jne     UpdateCardBundle_Debug
+        REPRET
+
+    UpdateCardBundle_Debug:
         mov     byte ptr [rdi], 0FFh
+#endif
+
         ret
 
     .balign 16
index 53d8f74..6a19e27 100644 (file)
@@ -20,6 +20,7 @@
 extern uint8_t* g_ephemeral_low;
 extern uint8_t* g_ephemeral_high;
 extern uint32_t* g_card_table;
+extern uint32_t* g_card_bundle_table;
 
 // Patch Labels for the various write barriers
 EXTERN_C void JIT_WriteBarrier_End();
@@ -27,17 +28,26 @@ EXTERN_C void JIT_WriteBarrier_End();
 EXTERN_C void JIT_WriteBarrier_PreGrow64(Object **dst, Object *ref);
 EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_Lower();
 EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_CardTable();
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+EXTERN_C void JIT_WriteBarrier_PreGrow64_Patch_Label_CardBundleTable();
+#endif
 EXTERN_C void JIT_WriteBarrier_PreGrow64_End();
 
 EXTERN_C void JIT_WriteBarrier_PostGrow64(Object **dst, Object *ref);
 EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Lower();
 EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_Upper();
 EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_CardTable();
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+EXTERN_C void JIT_WriteBarrier_PostGrow64_Patch_Label_CardBundleTable();
+#endif
 EXTERN_C void JIT_WriteBarrier_PostGrow64_End();
 
 #ifdef FEATURE_SVR_GC
 EXTERN_C void JIT_WriteBarrier_SVR64(Object **dst, Object *ref);
 EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardTable();
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+EXTERN_C void JIT_WriteBarrier_SVR64_PatchLabel_CardBundleTable();
+#endif
 EXTERN_C void JIT_WriteBarrier_SVR64_End();
 #endif // FEATURE_SVR_GC
 
@@ -46,6 +56,9 @@ EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64(Object **dst, Object *ref);
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_WriteWatchTable();
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_Lower();
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardTable();
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_Patch_Label_CardBundleTable();
+#endif
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PreGrow64_End();
 
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64(Object **dst, Object *ref);
@@ -53,20 +66,26 @@ EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_WriteWatchTable
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Lower();
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_Upper();
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardTable();
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_Patch_Label_CardBundleTable();
+#endif
 EXTERN_C void JIT_WriteBarrier_WriteWatch_PostGrow64_End();
 
 #ifdef FEATURE_SVR_GC
 EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64(Object **dst, Object *ref);
 EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_WriteWatchTable();
 EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardTable();
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_PatchLabel_CardBundleTable();
+#endif
 EXTERN_C void JIT_WriteBarrier_WriteWatch_SVR64_End();
 #endif // FEATURE_SVR_GC
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 
 WriteBarrierManager g_WriteBarrierManager;
 
-// Use this somewhat hokey macro to concantonate the function start with the patch 
-// label, this allows the code below to look relatively nice, but relies on the 
+// Use this somewhat hokey macro to concatenate the function start with the patch
+// label. This allows the code below to look relatively nice, but relies on the
 // naming convention which we have established for these helpers.
 #define CALC_PATCH_LOCATION(func,label,offset)      CalculatePatchLocation((PVOID)func, (PVOID)func##_##label, offset)
 
@@ -76,8 +95,9 @@ WriteBarrierManager::WriteBarrierManager() :
     LIMITED_METHOD_CONTRACT;
 }
 
-#ifndef CODECOVERAGE        // Deactivate alignment validation for code coverage builds 
-                            // because the instrumentation tool will not preserve alignmant constraits and we will fail.
+#ifndef CODECOVERAGE        // Deactivate alignment validation for code coverage builds
+                            // because the instrumentation tool will not preserve alignment
+                            // constraints and we will fail.
 
 void WriteBarrierManager::Validate()
 {
@@ -96,21 +116,41 @@ void WriteBarrierManager::Validate()
 
     PBYTE pLowerBoundImmediate, pUpperBoundImmediate, pCardTableImmediate;
 
-    pLowerBoundImmediate  = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2);
-    pCardTableImmediate   = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2);
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    PBYTE pCardBundleTableImmediate;
+#endif
+
+    pLowerBoundImmediate      = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2);
+    pCardTableImmediate       = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2);
+
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
 
-    pLowerBoundImmediate  = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2);
-    pUpperBoundImmediate  = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2);
-    pCardTableImmediate   = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2);
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardBundleTable, 2);
+    _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardBundleTableImmediate) & 0x7) == 0);
+#endif
+
+    pLowerBoundImmediate      = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2);
+    pUpperBoundImmediate      = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2);
+    pCardTableImmediate       = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pUpperBoundImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardBundleTable, 2);
+    _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardBundleTableImmediate) & 0x7) == 0);
+#endif
+
 #ifdef FEATURE_SVR_GC
-    pCardTableImmediate   = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2);
+    pCardTableImmediate        = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    pCardBundleTableImmediate  = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardBundleTable, 2);
+    _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardBundleTableImmediate) & 0x7) == 0);
+#endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
 #endif // FEATURE_SVR_GC
 
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
@@ -119,24 +159,41 @@ void WriteBarrierManager::Validate()
     pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_WriteWatchTable, 2);
     pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_Lower, 2);
     pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardTable, 2);
+
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardBundleTable, 2);
+    _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardBundleTableImmediate) & 0x7) == 0);
+#endif
+
     pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_WriteWatchTable, 2);
     pLowerBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Lower, 2);
     pUpperBoundImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_Upper, 2);
     pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardTable, 2);
+
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pLowerBoundImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pUpperBoundImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardBundleTable, 2);
+    _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardBundleTableImmediate) & 0x7) == 0);
+#endif
+
 #ifdef FEATURE_SVR_GC
     pWriteWatchTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_WriteWatchTable, 2);
     pCardTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardTable, 2);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pWriteWatchTableImmediate) & 0x7) == 0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardTableImmediate) & 0x7) == 0);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardBundleTable, 2);
+    _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", (reinterpret_cast<UINT64>(pCardBundleTableImmediate) & 0x7) == 0);
+#endif // FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
 #endif // FEATURE_SVR_GC
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 }
@@ -242,36 +299,51 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier,
     {
         case WRITE_BARRIER_PREGROW64:
         {
-            m_pLowerBoundImmediate  = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2);
-            m_pCardTableImmediate   = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2);
+            m_pLowerBoundImmediate      = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_Lower, 2);
+            m_pCardTableImmediate       = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardTable, 2);
 
             // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PreGrow64, Patch_Label_CardBundleTable, 2);
+            _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate);
+#endif
             break;
         }
 
         case WRITE_BARRIER_POSTGROW64:
         {
-            m_pLowerBoundImmediate  = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2);
-            m_pUpperBoundImmediate  = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2);
-            m_pCardTableImmediate   = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2);
+            m_pLowerBoundImmediate      = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Lower, 2);
+            m_pUpperBoundImmediate      = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_Upper, 2);
+            m_pCardTableImmediate       = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardTable, 2);
 
             // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_PostGrow64, Patch_Label_CardBundleTable, 2);
+            _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate);
+#endif
             break;
         }
 
 #ifdef FEATURE_SVR_GC
         case WRITE_BARRIER_SVR64:
         {
-            m_pCardTableImmediate   = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2);
+            m_pCardTableImmediate       = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardTable, 2);
 
             // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
-                        break;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_SVR64, PatchLabel_CardBundleTable, 2);
+            _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate);
+#endif
+            break;
         }
 #endif // FEATURE_SVR_GC
 
@@ -286,6 +358,11 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier,
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PreGrow64, Patch_Label_CardBundleTable, 2);
+            _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate);
+#endif
             break;
         }
 
@@ -301,6 +378,11 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier,
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pLowerBoundImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pUpperBoundImmediate);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_PostGrow64, Patch_Label_CardBundleTable, 2);
+            _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate);
+#endif
             break;
         }
 
@@ -313,6 +395,11 @@ void WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier,
             // Make sure that we will be bashing the right places (immediates should be hardcoded to 0x0f0f0f0f0f0f0f0f0).
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pWriteWatchTableImmediate);
             _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardTableImmediate);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            m_pCardBundleTableImmediate = CALC_PATCH_LOCATION(JIT_WriteBarrier_WriteWatch_SVR64, PatchLabel_CardBundleTable, 2);
+            _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/JITinterfaceAMD64.cpp", 0xf0f0f0f0f0f0f0f0 == *(UINT64*)m_pCardBundleTableImmediate);
+#endif
             break;
         }
 #endif // FEATURE_SVR_GC
@@ -504,15 +591,14 @@ void WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended)
 
 void WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 {
-    // If we are told that we require an upper bounds check (GC did some heap
-    // reshuffling), we need to switch to the WriteBarrier_PostGrow function for
-    // good.
+    // If we are told that we require an upper bounds check (GC did some heap reshuffling),
+    // we need to switch to the WriteBarrier_PostGrow function for good.
 
     WriteBarrierType newType;
     if (NeedDifferentWriteBarrier(bReqUpperBoundsCheck, &newType))
     {
         ChangeWriteBarrierTo(newType, isRuntimeSuspended);
-        return; 
+        return;
     }
 
 #ifdef _DEBUG
@@ -549,6 +635,14 @@ void WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSu
         fFlushCache = true;
     }
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    if (*(UINT64*)m_pCardBundleTableImmediate != (size_t)g_card_bundle_table)
+    {
+        *(UINT64*)m_pCardBundleTableImmediate = (size_t)g_card_bundle_table;
+        fFlushCache = true;
+    }
+#endif
+
     if (fFlushCache)
     {
         FlushInstructionCache(GetCurrentProcess(), (LPVOID)JIT_WriteBarrier, GetCurrentWriteBarrierSize());
index 61d0642..8b95f77 100644 (file)
@@ -1230,7 +1230,14 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)
         assert(args->card_table != nullptr);
         assert(args->lowest_address != nullptr);
         assert(args->highest_address != nullptr);
+
         g_card_table = args->card_table;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        assert(args->card_bundle_table != nullptr);
+        g_card_bundle_table = args->card_bundle_table;
+#endif
+
 #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
         if (args->write_watch_table != nullptr)
         {
@@ -1274,7 +1281,14 @@ void GCToEEInterface::StompWriteBarrier(WriteBarrierParameters* args)
         assert(!args->requires_upper_bounds_check && "the ephemeral generation must be at the top of the heap!");
 
         g_card_table = args->card_table;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        assert(g_card_bundle_table == nullptr);
+        g_card_bundle_table = args->card_bundle_table;
+#endif
+
         FlushProcessWriteBuffers();
+        
         g_lowest_address = args->lowest_address;
         VolatileStore(&g_highest_address, args->highest_address);
         ::StompWriteBarrierResize(true, false);
index 91f259d..05e64d4 100644 (file)
@@ -15,6 +15,10 @@ GPTR_IMPL_INIT(uint8_t,  g_highest_address, nullptr);
 uint8_t* g_ephemeral_low  = (uint8_t*)1;
 uint8_t* g_ephemeral_high = (uint8_t*)~0;
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+uint32_t* g_card_bundle_table = nullptr;
+#endif
+
 // This is the global GC heap, maintained by the VM.
 GPTR_IMPL(IGCHeap, g_pGCHeap);
 
index e76a211..c7eedc1 100644 (file)
@@ -20,6 +20,7 @@ GPTR_DECL(uint32_t,g_card_table);
 }
 #endif // !DACCESS_COMPILE
 
+extern "C" uint32_t* g_card_bundle_table;
 extern "C" uint8_t* g_ephemeral_low;
 extern "C" uint8_t* g_ephemeral_high;
 
index 30f6dd0..cc68b17 100644 (file)
@@ -1106,15 +1106,31 @@ OBJECTREF AllocateObject(MethodTable *pMT
 
 
 #if defined(_WIN64)
-// Card byte shift is different on 64bit.
-#define card_byte_shift     11
+    static const int card_byte_shift        = 11;
+    static const int card_bundle_byte_shift = 21;
 #else
-#define card_byte_shift     10
+    static const int card_byte_shift        = 10;
+
+    #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+        #error Manually managed card bundles are currently only implemented for AMD64.
+    #endif
 #endif
 
 #define card_byte(addr) (((size_t)(addr)) >> card_byte_shift)
 #define card_bit(addr)  (1 << ((((size_t)(addr)) >> (card_byte_shift - 3)) & 7))
 
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+#define card_bundle_byte(addr) (((size_t)(addr)) >> card_bundle_byte_shift)
+
+static void SetCardBundleByte(BYTE* addr)
+{
+    BYTE* cbByte = (BYTE *)VolatileLoadWithoutBarrier(&g_card_bundle_table) + card_bundle_byte(addr);
+    if (*cbByte != 0xFF)
+    {
+        *cbByte = 0xFF;
+    }
+}
+#endif
 
 #ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
 
@@ -1266,6 +1282,10 @@ extern "C" HCIMPL2_RAW(VOID, JIT_CheckedWriteBarrier, Object **dst, Object *ref)
             CheckedAfterAlreadyDirtyFilter++;
 #endif
             *pCardByte = 0xFF;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            SetCardBundleByte((BYTE*)dst);
+#endif
         }
     }
 }
@@ -1321,6 +1341,11 @@ extern "C" HCIMPL2_RAW(VOID, JIT_WriteBarrier, Object **dst, Object *ref)
             UncheckedAfterAlreadyDirtyFilter++;
 #endif
             *pCardByte = 0xFF;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            SetCardBundleByte((BYTE*)dst);
+#endif
+
         }
     }
 }
@@ -1371,15 +1396,22 @@ void ErectWriteBarrier(OBJECTREF *dst, OBJECTREF ref)
     }
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 
-    if((BYTE*) OBJECTREFToObject(ref) >= g_ephemeral_low && (BYTE*) OBJECTREFToObject(ref) < g_ephemeral_high)
+    if ((BYTE*) OBJECTREFToObject(ref) >= g_ephemeral_low && (BYTE*) OBJECTREFToObject(ref) < g_ephemeral_high)
     {
         // VolatileLoadWithoutBarrier() is used here to prevent fetch of g_card_table from being reordered 
         // with g_lowest/highest_address check above. See comment in code:gc_heap::grow_brick_card_tables.
         BYTE* pCardByte = (BYTE *)VolatileLoadWithoutBarrier(&g_card_table) + card_byte((BYTE *)dst);
-        if(*pCardByte != 0xFF)
+        if (*pCardByte != 0xFF)
+        {
             *pCardByte = 0xFF;
+            
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+            SetCardBundleByte((BYTE*)dst);
+#endif
+
+        }
     }
-}        
+}
 #include <optdefault.h>
 
 void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref)
@@ -1413,6 +1445,11 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref)
             if( !((*pCardByte) & card_bit((BYTE *)dst)) )
             {
                 *pCardByte = 0xFF;
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+                SetCardBundleByte((BYTE*)dst);
+#endif
+
             }
         }
     }
@@ -1449,7 +1486,6 @@ SetCardsAfterBulkCopy(Object **start, size_t len)
         return;
     }
 
-
     // Don't optimize the Generation 0 case if we are checking for write barrier violations
     // since we need to update the shadow heap even in the generation 0 case.
 #if defined (WRITE_BARRIER_CHECK) && !defined (SERVER_GC)
@@ -1493,6 +1529,26 @@ SetCardsAfterBulkCopy(Object **start, size_t len)
         clumpCount--;
     }
     while (clumpCount != 0);
+
+#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
+    size_t startBundleByte = startAddress >> card_bundle_byte_shift;
+    size_t endBundleByte = (endAddress + (1 << card_bundle_byte_shift) - 1) >> card_bundle_byte_shift;
+    size_t bundleByteCount = endBundleByte - startBundleByte;
+
+    uint8_t* pBundleByte = ((uint8_t*)VolatileLoadWithoutBarrier(&g_card_bundle_table)) + startBundleByte;
+
+    do
+    {
+        if (*pBundleByte != 0xFF)
+        {
+            *pBundleByte = 0xFF;
+        }
+
+        pBundleByte++;
+        bundleByteCount--;
+    }
+    while (bundleByteCount != 0);
+#endif
 }
 
 #if defined(_MSC_VER) && defined(_TARGET_X86_)
index 6cc3a05..593a199 100644 (file)
@@ -317,6 +317,7 @@ private:
     PBYTE   m_pWriteWatchTableImmediate;    // PREGROW | POSTGROW | SVR | WRITE_WATCH |
     PBYTE   m_pLowerBoundImmediate;         // PREGROW | POSTGROW |     | WRITE_WATCH |
     PBYTE   m_pCardTableImmediate;          // PREGROW | POSTGROW | SVR | WRITE_WATCH |
+    PBYTE   m_pCardBundleTableImmediate;    // PREGROW | POSTGROW | SVR | WRITE_WATCH |
     PBYTE   m_pUpperBoundImmediate;         //         | POSTGROW |     | WRITE_WATCH |
 };