aco: Add Primitive Ordered Pixel Shading scheduling rules
authorVitaliy Triang3l Kuzmin <triang3l@yandex.ru>
Mon, 3 Apr 2023 18:27:47 +0000 (21:27 +0300)
committerMarge Bot <emma+marge@anholt.net>
Mon, 26 Jun 2023 15:58:04 +0000 (15:58 +0000)
Implementing the acquire/release semantics of fragment shader interlock
ordered section in Vulkan, and preventing reordering of memory accesses
requiring primitive ordering out of the ordered section.

Also, the ordered section should be as short as possible, so not reordering
the instructions awaiting overlapped waves upwards, and the exit from the
ordered section downwards.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Signed-off-by: Vitaliy Triang3l Kuzmin <triang3l@yandex.ru>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22250>

src/amd/compiler/aco_ir.cpp
src/amd/compiler/aco_scheduler.cpp

index ea5e1f2..5119890 100644 (file)
@@ -223,6 +223,17 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
 memory_sync_info
 get_sync_info(const Instruction* instr)
 {
+   /* Primitive Ordered Pixel Shading barriers necessary for accesses to memory shared between
+    * overlapping waves in the queue family.
+    */
+   if (instr->opcode == aco_opcode::p_pops_gfx9_overlapped_wave_wait_done ||
+       (instr->opcode == aco_opcode::s_wait_event &&
+        !(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) {
+      return memory_sync_info(storage_buffer | storage_image, semantic_acquire, scope_queuefamily);
+   } else if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) {
+      return memory_sync_info(storage_buffer | storage_image, semantic_release, scope_queuefamily);
+   }
+
    switch (instr->format) {
    case Format::SMEM: return instr->smem().sync;
    case Format::MUBUF: return instr->mubuf().sync;
index 2e43fa2..d0cd09d 100644 (file)
@@ -571,6 +571,21 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
    if (!upwards && instr->opcode == aco_opcode::p_exit_early_if)
       return hazard_fail_unreorderable;
 
+   /* In Primitive Ordered Pixel Shading, await overlapped waves as late as possible, and notify
+    * overlapping waves that they can continue execution as early as possible.
+    */
+   if (upwards) {
+      if (instr->opcode == aco_opcode::p_pops_gfx9_add_exiting_wave_id ||
+          (instr->opcode == aco_opcode::s_wait_event &&
+           !(instr->sopp().imm & wait_event_imm_dont_wait_export_ready))) {
+         return hazard_fail_unreorderable;
+      }
+   } else {
+      if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) {
+         return hazard_fail_unreorderable;
+      }
+   }
+
    if (query->uses_exec || query->writes_exec) {
       for (const Definition& def : instr->definitions) {
          if (def.isFixed() && def.physReg() == exec)
@@ -580,7 +595,13 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
    if (query->writes_exec && needs_exec_mask(instr))
       return hazard_fail_exec;
 
-   /* don't move exports so that they stay closer together */
+   /* Don't move exports so that they stay closer together.
+    * Also, with Primitive Ordered Pixel Shading on GFX11+, the `done` export must not be moved
+    * above the memory accesses before the queue family scope (more precisely, fragment interlock
+    * scope, but it's not available in ACO) release barrier that is expected to be inserted before
+    * the export, as well as before any `s_wait_event export_ready` which enters the ordered
+    * section, because the `done` export exits the ordered section.
+    */
    if (instr->isEXP())
       return hazard_fail_export;