intel/fs: Allow FS_OPCODE_SCHEDULING_FENCE stall on registers
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Fri, 17 Jan 2020 22:52:13 +0000 (14:52 -0800)
committerMarge Bot <eric+marge@anholt.net>
Wed, 29 Apr 2020 07:17:27 +0000 (07:17 +0000)
It will generate the MOVs (or SYNC_NOP in Gen12+) needed for stall.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>

src/intel/compiler/brw_eu_defines.h
src/intel/compiler/brw_fs_generator.cpp

index d27205d..146d3b3 100644 (file)
@@ -465,6 +465,9 @@ enum opcode {
 
    /**
     * Scheduling-only fence.
+    *
+    * Sources can be used to force a stall until the registers in those are
+    * available.  This might generate MOVs or SYNC_NOPs (Gen12+).
     */
    FS_OPCODE_SCHEDULING_FENCE,
 
index fa2abd4..b055110 100644 (file)
@@ -2228,8 +2228,33 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
       }
 
       case FS_OPCODE_SCHEDULING_FENCE:
-         if (unlikely(debug_flag))
-            disasm_info->use_tail = true;
+         if (inst->sources == 0 && inst->sched.regdist == 0 &&
+                                   inst->sched.mode == TGL_SBID_NULL) {
+            if (unlikely(debug_flag))
+               disasm_info->use_tail = true;
+            break;
+         }
+
+         if (devinfo->gen >= 12) {
+            /* Use the available SWSB information to stall.  A single SYNC is
+             * sufficient since if there were multiple dependencies, the
+             * scoreboard algorithm already injected other SYNCs before this
+             * instruction.
+             */
+            brw_SYNC(p, TGL_SYNC_NOP);
+         } else {
+            for (unsigned i = 0; i < inst->sources; i++) {
+               /* Emit a MOV to force a stall until the instruction producing the
+                * registers finishes.
+                */
+               brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW),
+                       retype(src[i], BRW_REGISTER_TYPE_UW));
+            }
+
+            if (inst->sources > 1)
+               multiple_instructions_emitted = true;
+         }
+
          break;
 
       case SHADER_OPCODE_INTERLOCK: