intel/fs: implement Wa_14017989577
authorTapani Pälli <tapani.palli@intel.com>
Tue, 6 Dec 2022 16:11:10 +0000 (18:11 +0200)
committerEric Engestrom <eric@engestrom.ch>
Wed, 14 Dec 2022 20:47:01 +0000 (20:47 +0000)
The first instruction of any kernel should have non-zero emask. This
restriction needs to be obeyed to avoid GPU hangs.

Patch adds a function to insert dummy mov as first instruction
to make sure this requirement is fulfilled.

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20194>
(cherry picked from commit bc4b7de0d0469e296f7ec4626fccdf97926b1c8e)

.pick_status.json
src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs.h

index a9dd9b8..bdfb0f2 100644 (file)
         "description": "intel/fs: implement Wa_14017989577",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
index 761b79c..db4806b 100644 (file)
@@ -6197,6 +6197,35 @@ needs_dummy_fence(const intel_device_info *devinfo, fs_inst *inst)
    return false;
 }
 
+/* Wa_14017989577
+ *
+ * The first instruction of any kernel should have non-zero emask.
+ * Make sure this happens by introducing a dummy mov instruction.
+ */
+void
+fs_visitor::emit_dummy_mov_instruction()
+{
+   if (devinfo->verx10 < 120)
+      return;
+
+   struct backend_instruction *first_inst =
+      cfg->first_block()->start();
+
+   /* We can skip the WA if first instruction is marked with
+    * force_writemask_all or exec_size equals dispatch_width.
+    */
+   if (first_inst->force_writemask_all ||
+       first_inst->exec_size == dispatch_width)
+      return;
+
+   /* Insert dummy mov as first instruction. */
+   const fs_builder ubld =
+      bld.at(cfg->first_block(), first_inst).exec_all().group(8, 0);
+   ubld.MOV(bld.null_reg_ud(), brw_imm_ud(0u));
+
+   invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
+}
+
 /* Wa_22013689345
  *
  * We need to emit UGM fence message before EOT, if shader has any UGM write
@@ -6557,6 +6586,10 @@ fs_visitor::run_vs()
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(true /* allow_spilling */);
 
    return !failed;
@@ -6679,6 +6712,10 @@ fs_visitor::run_tcs()
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(true /* allow_spilling */);
 
    return !failed;
@@ -6707,6 +6744,10 @@ fs_visitor::run_tes()
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(true /* allow_spilling */);
 
    return !failed;
@@ -6751,6 +6792,10 @@ fs_visitor::run_gs()
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(true /* allow_spilling */);
 
    return !failed;
@@ -6851,6 +6896,9 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
       fixup_3src_null_dest();
       emit_dummy_memory_fence_before_eot();
 
+      /* Wa_14017989577 */
+      emit_dummy_mov_instruction();
+
       allocate_registers(allow_spilling);
    }
 
@@ -6887,6 +6935,10 @@ fs_visitor::run_cs(bool allow_spilling)
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(allow_spilling);
 
    return !failed;
@@ -6915,6 +6967,10 @@ fs_visitor::run_bs(bool allow_spilling)
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(allow_spilling);
 
    return !failed;
@@ -6944,6 +7000,10 @@ fs_visitor::run_task(bool allow_spilling)
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(allow_spilling);
 
    return !failed;
@@ -6973,6 +7033,10 @@ fs_visitor::run_mesh(bool allow_spilling)
 
    fixup_3src_null_dest();
    emit_dummy_memory_fence_before_eot();
+
+   /* Wa_14017989577 */
+   emit_dummy_mov_instruction();
+
    allocate_registers(allow_spilling);
 
    return !failed;
index 155e563..6b2f593 100644 (file)
@@ -224,6 +224,7 @@ public:
    bool fixup_sends_duplicate_payload();
    void fixup_3src_null_dest();
    void emit_dummy_memory_fence_before_eot();
+   void emit_dummy_mov_instruction();
    bool fixup_nomask_control_flow();
    void assign_curb_setup();
    void assign_urb_setup();