vc4: Add THRSW nodes after each tex sample setup in multithreaded mode.

author Eric Anholt <eric@anholt.net>

Fri, 11 Nov 2016 01:16:04 +0000 (17:16 -0800)

committer Eric Anholt <eric@anholt.net>

Sun, 13 Nov 2016 03:21:46 +0000 (19:21 -0800)
author Eric Anholt <eric@anholt.net>
Fri, 11 Nov 2016 01:16:04 +0000 (17:16 -0800)
committer Eric Anholt <eric@anholt.net>
Sun, 13 Nov 2016 03:21:46 +0000 (19:21 -0800)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c

index abd20a5..ad06d85 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -65,6 +65,23 @@ resize_qreg_array(struct vc4_compile *c,
                  (*regs)[i] = c->undef;
  }
  
+static void
+ntq_emit_thrsw(struct vc4_compile *c)
+{
+        if (!c->fs_threaded)
+                return;
+
+        /* Always thread switch after each texture operation for now.
+         *
+         * We could do better by batching a bunch of texture fetches up and
+         * then doing one thread switch and collecting all their results
+         * afterward.
+         */
+        qir_emit_nondef(c, qir_inst(QOP_THRSW, c->undef,
+                                    c->undef, c->undef));
+        c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL);
+}
+
  static struct qreg
  indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
  {
@@ -105,6 +122,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
  
          qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
          c->num_texture_samples++;
+
+        ntq_emit_thrsw(c);
+
          return qir_TEX_RESULT(c);
  }
  
@@ -363,6 +383,8 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
  
          qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
  
+        ntq_emit_thrsw(c);
+
          struct qreg tex = qir_TEX_RESULT(c);
          c->num_texture_samples++;
  
@@ -483,6 +505,9 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
          qir_TEX_S(c, s, texture_u[next_texture_u++]);
  
          c->num_texture_samples++;
+
+        ntq_emit_thrsw(c);
+
          struct qreg tex = qir_TEX_RESULT(c);
  
          enum pipe_format format = c->key->tex[unit].format;
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c

index 6b7efdb..4f17733 100644 (file)
--- a/src/gallium/drivers/vc4/vc4_qir_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -229,6 +229,30 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
                  add_write_dep(dir, &state->last_tex_result, n);
                  break;
  
+        case QOP_THRSW:
+                /* After a new THRSW, one must collect all texture samples
+                 * queued since the previous THRSW/program start.  For now, we
+                 * have one THRSW in between each texture setup and its
+                 * results collection as our input, and we just make sure that
+                 * that ordering is maintained.
+                 */
+                add_write_dep(dir, &state->last_tex_coord, n);
+                add_write_dep(dir, &state->last_tex_result, n);
+
+                /* accumulators and flags are lost across thread switches. */
+                add_write_dep(dir, &state->last_sf, n);
+
+                /* Setup, like the varyings, will need to be drained before we
+                 * thread switch.
+                 */
+                add_write_dep(dir, &state->last_vary_read, n);
+
+                /* The TLB-locking operations have to stay after the last
+                 * thread switch.
+                 */
+                add_write_dep(dir, &state->last_tlb, n);
+                break;
+
          case QOP_TLB_COLOR_READ:
          case QOP_MS_MASK:
                  add_write_dep(dir, &state->last_tlb, n);
author	Eric Anholt <eric@anholt.net>
	Fri, 11 Nov 2016 01:16:04 +0000 (17:16 -0800)
committer	Eric Anholt <eric@anholt.net>
	Sun, 13 Nov 2016 03:21:46 +0000 (19:21 -0800)
src/gallium/drivers/vc4/vc4_program.c		patch \| blob \| history
src/gallium/drivers/vc4/vc4_qir_schedule.c		patch \| blob \| history