radv/rt: Refactor exiting PLOC
authorFriedrich Vock <friedrich.vock@gmx.de>
Sun, 18 Dec 2022 19:37:33 +0000 (20:37 +0100)
committerMarge Bot <emma+marge@anholt.net>
Sun, 18 Dec 2022 21:24:45 +0000 (21:24 +0000)
The previous approach was susceptible to sync hazards, causing hangs in Doom Eternal.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7897
Fixes: 271865373 ("radv: Add PLOC shader")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20377>

src/amd/vulkan/bvh/build_helpers.h
src/amd/vulkan/bvh/bvh.h
src/amd/vulkan/bvh/ploc_internal.comp

index fb5684b..1e57cda 100644 (file)
@@ -457,21 +457,28 @@ fetch_task(REF(radv_ir_header) header, bool did_work)
          if (global_task_index == DEREF(header).sync_data.current_phase_end_counter &&
              DEREF(header).sync_data.task_done_counter ==
                 DEREF(header).sync_data.current_phase_end_counter) {
-            atomicAdd(DEREF(header).sync_data.phase_index, 1);
-            DEREF(header).sync_data.current_phase_start_counter =
-               DEREF(header).sync_data.current_phase_end_counter;
-            /* Ensure the changes to the phase index and start/end counter are visible for other
-             * workgroup waiting in the loop. */
-            memoryBarrier(
-               gl_ScopeDevice, gl_StorageSemanticsBuffer,
-               gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
-            atomicAdd(DEREF(header).sync_data.current_phase_end_counter,
-                      DIV_ROUND_UP(task_count(header), gl_WorkGroupSize.x));
+            if (DEREF(header).sync_data.next_phase_exit_flag != 0) {
+               DEREF(header).sync_data.phase_index = TASK_INDEX_INVALID;
+               memoryBarrier(
+                  gl_ScopeDevice, gl_StorageSemanticsBuffer,
+                  gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
+            } else {
+               atomicAdd(DEREF(header).sync_data.phase_index, 1);
+               DEREF(header).sync_data.current_phase_start_counter =
+                  DEREF(header).sync_data.current_phase_end_counter;
+               /* Ensure the changes to the phase index and start/end counter are visible for other
+                * workgroup waiting in the loop. */
+               memoryBarrier(
+                  gl_ScopeDevice, gl_StorageSemanticsBuffer,
+                  gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
+               atomicAdd(DEREF(header).sync_data.current_phase_end_counter,
+                         DIV_ROUND_UP(task_count(header), gl_WorkGroupSize.x));
+            }
             break;
          }
 
          /* If other invocations have finished all nodes, break out; there is no work to do */
-         if (task_count(header) == 1) {
+         if (DEREF(header).sync_data.phase_index == TASK_INDEX_INVALID) {
             break;
          }
       } while (global_task_index >= DEREF(header).sync_data.current_phase_end_counter);
@@ -480,7 +487,7 @@ fetch_task(REF(radv_ir_header) header, bool did_work)
    }
 
    barrier();
-   if (task_count(header) == 1)
+   if (DEREF(header).sync_data.phase_index == TASK_INDEX_INVALID)
       return TASK_INDEX_INVALID;
 
    num_tasks_to_skip = shared_phase_index - phase_index;
index 93c4e38..96ec036 100644 (file)
@@ -149,6 +149,9 @@ struct radv_global_sync_data {
    uint32_t current_phase_start_counter;
    uint32_t current_phase_end_counter;
    uint32_t phase_index;
+   /* If this flag is set, the shader should exit
+    * instead of executing another phase */
+   uint32_t next_phase_exit_flag;
 };
 
 struct radv_ir_header {
index 1b899fa..6bf7496 100644 (file)
@@ -250,7 +250,7 @@ main(void)
 
    for (uint iter = 0;; ++iter) {
       uint32_t current_task_count = task_count(args.header);
-      if (current_task_count <= 1)
+      if (task_index == TASK_INDEX_INVALID)
          break;
 
       /* Find preferred partners and merge them */
@@ -409,6 +409,8 @@ main(void)
 
          if (task_index == current_task_count - 1) {
             set_next_task_count(args.header, new_offset);
+            if (new_offset == 1)
+               DEREF(args.header).sync_data.next_phase_exit_flag = 1;
          }
       }
    }