Always fire join events for an rjoin (#651)
authorAndy Hanson <anhans@microsoft.com>
Fri, 7 Feb 2020 22:30:39 +0000 (17:30 -0500)
committerGitHub <noreply@github.com>
Fri, 7 Feb 2020 22:30:39 +0000 (14:30 -0800)
Previously, there was an additional check
`if (!join_struct.wait_done)` at the beginning of an rjoin.

This usually had no effect as the join isn't usually done yet. But if
thread A is the first to enter the join and finishes before thread B
starts, then B will never enter the `if`.

The only effect this had was that B would not fire join events in these
rare cases. (After that we check `join_struct.wait_done` again anyway.)
As of this PR we will always fire the events, which makes them easier
to analyze consistently.

Before this PR, looking at the join events for a single thread would show
no traces of the rjoin happening, except for an extra restart event from
the other thread.

src/coreclr/src/gc/gc.cpp

index 13a8335..abbabb6 100644 (file)
@@ -1022,50 +1022,44 @@ respin:
 
         if (Interlocked::CompareExchange(&join_struct.r_join_lock, 0, join_struct.n_threads) == 0)
         {
-            if (!join_struct.wait_done)
-            {
-                dprintf (JOIN_LOG, ("r_join() Waiting..."));
+            fire_event (gch->heap_number, time_start, type_join, join_id);
 
-                fire_event (gch->heap_number, time_start, type_join, join_id);
+            dprintf (JOIN_LOG, ("r_join() Waiting..."));
 
-                //busy wait around the color
-                if (!join_struct.wait_done)
-                {
+            //busy wait around the color
         respin:
-                    int spin_count = 256 * yp_spin_count_unit;
-                    for (int j = 0; j < spin_count; j++)
-                    {
-                        if (join_struct.wait_done)
-                        {
-                            break;
-                        }
-                        YieldProcessor();           // indicate to the processor that we are spinning
-                    }
-
-                    // we've spun, and if color still hasn't changed, fall into hard wait
-                    if (!join_struct.wait_done)
-                    {
-                        dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived));
-                        uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE);
-                        if (dwJoinWait != WAIT_OBJECT_0)
-                        {
-                            STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %Ix", dwJoinWait);
-                            FATAL_GC_ERROR ();
-                        }
-                    }
-
-                    // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
-                    if (!join_struct.wait_done)
-                    {
-                        goto respin;
-                    }
+            int spin_count = 256 * yp_spin_count_unit;
+            for (int j = 0; j < spin_count; j++)
+            {
+                if (join_struct.wait_done)
+                {
+                    break;
+                }
+                YieldProcessor();           // indicate to the processor that we are spinning
+            }
 
-                    dprintf (JOIN_LOG, ("r_join() done"));
+            // we've spun, and if color still hasn't changed, fall into hard wait
+            if (!join_struct.wait_done)
+            {
+                dprintf (JOIN_LOG, ("Join() hard wait on reset event %d", first_thread_arrived));
+                uint32_t dwJoinWait = join_struct.joined_event[first_thread_arrived].Wait(INFINITE, FALSE);
+                if (dwJoinWait != WAIT_OBJECT_0)
+                {
+                    STRESS_LOG1 (LF_GC, LL_FATALERROR, "joined event wait failed with code: %Ix", dwJoinWait);
+                    FATAL_GC_ERROR ();
                 }
+            }
 
-                fire_event (gch->heap_number, time_end, type_join, join_id);
+            // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
+            if (!join_struct.wait_done)
+            {
+                goto respin;
             }
 
+            dprintf (JOIN_LOG, ("r_join() done"));
+
+            fire_event (gch->heap_number, time_end, type_join, join_id);
+
             return FALSE;
         }
         else