intel/fs: Add SWSB dependency annotations for cross-pipeline WaR data hazards on...
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 26 May 2021 23:50:40 +0000 (16:50 -0700)
committerMarge Bot <eric+marge@anholt.net>
Wed, 23 Jun 2021 07:34:22 +0000 (07:34 +0000)
In cases where an in-order instruction is overwriting a register
previously read by another in-order instruction, drop the dependency
iff the previous read is guaranteed to have occurred from the same
in-order pipeline.  This should only have an effect on XeHP+ since
previous Xe platforms only had one in-order FPU pipeline.

The previous workaround we were using for this treated all ordered
read dependencies as write dependencies to avoid noise from our
simulation environment.  Relative to our previous workaround this
improves performance of GFXBench5 gl_tess by ~7% on a DG2 system
among other single-digit percentual FPS improvements.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>

src/intel/compiler/brw_fs_scoreboard.cpp

index 8702e12..4176f54 100644 (file)
@@ -579,11 +579,16 @@ namespace {
    /**
     * Return simplified dependency removing any synchronization modes not
     * applicable to an instruction \p inst writing the same register location.
+    *
+    * This clears any WaR dependency for writes performed from the same
+    * pipeline as the read, since there is no possibility for a data hazard.
     */
    dependency
-   dependency_for_write(const fs_inst *inst, dependency dep)
+   dependency_for_write(const struct intel_device_info *devinfo,
+                        const fs_inst *inst, dependency dep)
    {
-      if (!is_unordered(inst))
+      if (!is_unordered(inst) &&
+          is_single_pipe(dep.jp, inferred_exec_pipe(devinfo, inst)))
          dep.ordered &= TGL_REGDIST_DST;
       return dep;
    }
@@ -1124,7 +1129,7 @@ namespace {
             if (inst->dst.file != BAD_FILE && !inst->dst.is_null() &&
                 !inst->dst.is_accumulator()) {
                for (unsigned j = 0; j < regs_written(inst); j++) {
-                  add_dependency(ids, deps[ip], dependency_for_write(inst,
+                  add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
                      sb.get(byte_offset(inst->dst, REG_SIZE * j))));
                }
             }
@@ -1144,7 +1149,7 @@ namespace {
 
             if (is_send(inst) && inst->base_mrf != -1) {
                for (unsigned j = 0; j < inst->implied_mrf_writes(); j++)
-                  add_dependency(ids, deps[ip], dependency_for_write(inst,
+                  add_dependency(ids, deps[ip], dependency_for_write(devinfo, inst,
                      sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0))));
             }
          }