intel/fs: Represent SWSB in-order dependency addresses as vectors.

author Francisco Jerez <currojerez@riseup.net>

Fri, 19 Feb 2021 07:26:57 +0000 (23:26 -0800)

committer Marge Bot <eric+marge@anholt.net>

Fri, 16 Apr 2021 08:27:35 +0000 (08:27 +0000)
author Francisco Jerez <currojerez@riseup.net>
Fri, 19 Feb 2021 07:26:57 +0000 (23:26 -0800)
committer Marge Bot <eric+marge@anholt.net>
Fri, 16 Apr 2021 08:27:35 +0000 (08:27 +0000)
diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp

index 3b1f6de..f3a6426 100644 (file)
--- a/src/intel/compiler/brw_fs_scoreboard.cpp
+++ b/src/intel/compiler/brw_fs_scoreboard.cpp
@@ -130,13 +130,21 @@ namespace {
     }
  
     /**
-    * Number of in-order hardware instructions contained in this IR
-    * instruction.  This determines the increment applied to the RegDist
-    * counter calculated for any ordered dependency that crosses this
+    * Index of the \p p pipeline counter in the ordered_address vector defined
+    * below.
+    */
+#define IDX(p) (p >= TGL_PIPE_FLOAT ? unsigned(p - TGL_PIPE_FLOAT) :    \
+                (abort(), ~0u))
+
+   /**
+    * Number of in-order hardware instructions for pipeline index \p contained
+    * in this IR instruction.  This determines the increment applied to the
+    * RegDist counter calculated for any ordered dependency that crosses this
      * instruction.
      */
     unsigned
-   ordered_unit(const fs_inst *inst)
+   ordered_unit(const struct gen_device_info *devinfo, const fs_inst *inst,
+                unsigned p)
     {
        switch (inst->opcode) {
        case BRW_OPCODE_SYNC:
@@ -156,7 +164,11 @@ namespace {
            * (again) don't use virtual instructions if you want optimal
            * scheduling.
            */
-         return is_unordered(inst) ? 0 : 1;
+         if (!is_unordered(inst) && (p == IDX(inferred_exec_pipe(devinfo, inst)) ||
+                                     p == IDX(TGL_PIPE_ALL)))
+            return 1;
+         else
+            return 0;
        }
     }
  
@@ -164,8 +176,36 @@ namespace {
      * Type for an instruction counter that increments for in-order
      * instructions only, arbitrarily denoted 'jp' throughout this lowering
      * pass in order to distinguish it from the regular instruction counter.
+    * This is represented as a vector with an independent counter for each
+    * asynchronous ALU pipeline in the EU.
      */
-   typedef int ordered_address;
+   struct ordered_address {
+      /**
+       * Construct the ordered address of a dependency known to execute on a
+       * single specified pipeline \p p (unless TGL_PIPE_NONE or TGL_PIPE_ALL
+       * is provided), in which case the vector counter will be initialized
+       * with all components equal to INT_MIN (always satisfied) except for
+       * component IDX(p).
+       */
+      ordered_address(tgl_pipe p = TGL_PIPE_NONE, int jp0 = INT_MIN) {
+         for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++)
+            jp[q] = (p == TGL_PIPE_NONE || (IDX(p) != q && p != TGL_PIPE_ALL) ?
+                     INT_MIN : jp0);
+      }
+
+      int jp[IDX(TGL_PIPE_ALL)];
+
+      friend bool
+      operator==(const ordered_address &jp0, const ordered_address &jp1)
+      {
+         for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
+            if (jp0.jp[p] != jp1.jp[p])
+               return false;
+         }
+
+         return true;
+      }
+   };
  
     /**
      * Return the number of instructions in the program.
@@ -184,12 +224,13 @@ namespace {
     ordered_inst_addresses(const fs_visitor *shader)
     {
        ordered_address *jps = new ordered_address[num_instructions(shader)];
-      ordered_address jp = 0;
+      ordered_address jp(TGL_PIPE_ALL, 0);
        unsigned ip = 0;
  
        foreach_block_and_inst(block, fs_inst, inst, shader->cfg) {
           jps[ip] = jp;
-         jp += ordered_unit(inst);
+         for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+            jp.jp[p] += ordered_unit(shader->devinfo, inst, p);
           ip++;
        }
  
@@ -347,7 +388,7 @@ namespace {
        /**
         * No dependency information.
         */
-      dependency() : ordered(TGL_REGDIST_NULL), jp(INT_MIN),
+      dependency() : ordered(TGL_REGDIST_NULL), jp(),
                       unordered(TGL_SBID_NULL), id(0),
                       exec_all(false) {}
  
@@ -355,7 +396,8 @@ namespace {
         * Construct a dependency on the in-order instruction with the provided
         * ordered_address instruction counter.
         */
-      dependency(tgl_regdist_mode mode, ordered_address jp, bool exec_all) :
+      dependency(tgl_regdist_mode mode, const ordered_address &jp,
+                 bool exec_all) :
           ordered(mode), jp(jp), unordered(TGL_SBID_NULL), id(0),
           exec_all(exec_all) {}
  
@@ -364,7 +406,7 @@ namespace {
         * specified synchronization token.
         */
        dependency(tgl_sbid_mode mode, unsigned id, bool exec_all) :
-         ordered(TGL_REGDIST_NULL), jp(INT_MIN), unordered(mode), id(id),
+         ordered(TGL_REGDIST_NULL), jp(), unordered(mode), id(id),
           exec_all(exec_all) {}
  
        /**
@@ -432,7 +474,8 @@ namespace {
        }
     };
  
-   const dependency dependency::done = dependency(TGL_REGDIST_SRC, INT_MIN, false);
+   const dependency dependency::done =
+        dependency(TGL_REGDIST_SRC, ordered_address(), false);
  
     /**
      * Return whether \p dep contains any dependency information.
@@ -458,7 +501,8 @@ namespace {
  
        if (dep0.ordered || dep1.ordered) {
           dep.ordered = dep0.ordered | dep1.ordered;
-         dep.jp = MAX2(dep0.jp, dep1.jp);
+         for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+            dep.jp.jp[p] = MAX2(dep0.jp.jp[p], dep1.jp.jp[p]);
        }
  
        if (dep0.unordered || dep1.unordered) {
@@ -492,10 +536,14 @@ namespace {
      * the end of the origin block.
      */
     dependency
-   transport(dependency dep, int delta)
+   transport(dependency dep, int delta[IDX(TGL_PIPE_ALL)])
     {
-      if (dep.ordered && dep.jp > INT_MIN)
-         dep.jp += delta;
+      if (dep.ordered) {
+         for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++) {
+            if (dep.jp.jp[p] > INT_MIN)
+               dep.jp.jp[p] += delta[p];
+         }
+      }
  
        return dep;
     }
@@ -599,7 +647,7 @@ namespace {
         * object.  \sa transport().
         */
        friend scoreboard
-      transport(const scoreboard &sb0, int delta)
+      transport(const scoreboard &sb0, int delta[IDX(TGL_PIPE_ALL)])
        {
           scoreboard sb;
  
@@ -736,7 +784,9 @@ namespace {
                 continue;
  
              if (dep.ordered && deps[i].ordered) {
-               deps[i].jp = MAX2(deps[i].jp, dep.jp);
+               for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+                  deps[i].jp.jp[p] = MAX2(deps[i].jp.jp[p], dep.jp.jp[p]);
+
                 deps[i].ordered |= dep.ordered;
                 deps[i].exec_all |= dep.exec_all;
                 dep.ordered = TGL_REGDIST_NULL;
@@ -770,11 +820,13 @@ namespace {
  
        for (unsigned i = 0; i < deps.size(); i++) {
           if (deps[i].ordered && exec_all >= deps[i].exec_all) {
-            const unsigned dist = jp - deps[i].jp;
-            const unsigned max_dist = 10;
-            assert(jp > deps[i].jp);
-            if (dist <= max_dist)
-               min_dist = MIN3(min_dist, dist, 7);
+            for (unsigned q = 0; q < IDX(TGL_PIPE_ALL); q++) {
+               const unsigned dist = jp.jp[q] - int64_t(deps[i].jp.jp[q]);
+               const unsigned max_dist = (q == IDX(TGL_PIPE_LONG) ? 14 : 10);
+               assert(jp.jp[q] > deps[i].jp.jp[q]);
+               if (dist <= max_dist)
+                  min_dist = MIN3(min_dist, dist, 7);
+            }
           }
        }
  
@@ -861,6 +913,10 @@ namespace {
                            const fs_inst *inst, unsigned ip, scoreboard &sb)
     {
        const bool exec_all = inst->force_writemask_all;
+      const struct gen_device_info *devinfo = shader->devinfo;
+      const tgl_pipe p = inferred_exec_pipe(devinfo, inst);
+      const ordered_address jp = p ? ordered_address(p, jps[ip].jp[IDX(p)]) :
+                                     ordered_address();
  
        /* Track any source registers that may be fetched asynchronously by this
         * instruction, otherwise clear the dependency in order to avoid
@@ -870,7 +926,8 @@ namespace {
           const dependency rd_dep =
              (inst->is_payload(i) ||
               inst->is_math()) ? dependency(TGL_SBID_SRC, ip, exec_all) :
-            ordered_unit(inst) ? dependency(TGL_REGDIST_SRC, jps[ip], exec_all) :
+            ordered_unit(devinfo, inst, IDX(TGL_PIPE_ALL)) ?
+               dependency(TGL_REGDIST_SRC, jp, exec_all) :
              dependency::done;
  
           for (unsigned j = 0; j < regs_read(inst, i); j++)
@@ -887,7 +944,8 @@ namespace {
        /* Track any destination registers of this instruction. */
        const dependency wr_dep =
           is_unordered(inst) ? dependency(TGL_SBID_DST, ip, exec_all) :
-         ordered_unit(inst) ? dependency(TGL_REGDIST_DST, jps[ip], exec_all) :
+         ordered_unit(devinfo, inst, IDX(TGL_PIPE_ALL)) ?
+            dependency(TGL_REGDIST_DST, jp, exec_all) :
           dependency();
  
        if (is_valid(wr_dep) && inst->dst.file != BAD_FILE &&
@@ -942,9 +1000,13 @@ namespace {
                 foreach_list_typed(bblock_link, child_link, link,
                                    &block->children) {
                    scoreboard &in_sb = in_sbs[child_link->block->num];
-                  const int delta =
-                     jps[child_link->block->start_ip] - jps[block->end_ip]
-                     - ordered_unit(static_cast<const fs_inst *>(block->end()));
+                  int delta[IDX(TGL_PIPE_ALL)];
+
+                  for (unsigned p = 0; p < IDX(TGL_PIPE_ALL); p++)
+                     delta[p] = jps[child_link->block->start_ip].jp[p]
+                        - jps[block->end_ip].jp[p]
+                        - ordered_unit(shader->devinfo,
+                                       static_cast<const fs_inst *>(block->end()), p);
  
                    in_sb = merge(eq, in_sb, transport(sb, delta));
                 }
author	Francisco Jerez <currojerez@riseup.net>
	Fri, 19 Feb 2021 07:26:57 +0000 (23:26 -0800)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 16 Apr 2021 08:27:35 +0000 (08:27 +0000)