(*regs)[i] = c->undef;
}
+static void
+ntq_emit_thrsw(struct vc4_compile *c)
+{
+ if (!c->fs_threaded)
+ return;
+
+ /* Always thread switch after each texture operation for now.
+ *
+ * We could do better by batching a bunch of texture fetches up and
+ * then doing one thread switch and collecting all their results
+ * afterward.
+ */
+ qir_emit_nondef(c, qir_inst(QOP_THRSW, c->undef,
+ c->undef, c->undef));
+ c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL);
+}
+
static struct qreg
indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
{
qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
c->num_texture_samples++;
+
+ ntq_emit_thrsw(c);
+
return qir_TEX_RESULT(c);
}
qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
+ ntq_emit_thrsw(c);
+
struct qreg tex = qir_TEX_RESULT(c);
c->num_texture_samples++;
qir_TEX_S(c, s, texture_u[next_texture_u++]);
c->num_texture_samples++;
+
+ ntq_emit_thrsw(c);
+
struct qreg tex = qir_TEX_RESULT(c);
enum pipe_format format = c->key->tex[unit].format;
add_write_dep(dir, &state->last_tex_result, n);
break;
+ case QOP_THRSW:
+ /* After a new THRSW, one must collect all texture samples
+ * queued since the previous THRSW/program start. For now, we
+ * have one THRSW in between each texture setup and its
+ * results collection as our input, and we just make sure that
+ * that ordering is maintained.
+ */
+ add_write_dep(dir, &state->last_tex_coord, n);
+ add_write_dep(dir, &state->last_tex_result, n);
+
+ /* accumulators and flags are lost across thread switches. */
+ add_write_dep(dir, &state->last_sf, n);
+
+ /* Setup, like the varyings, will need to be drained before we
+ * thread switch.
+ */
+ add_write_dep(dir, &state->last_vary_read, n);
+
+ /* The TLB-locking operations have to stay after the last
+ * thread switch.
+ */
+ add_write_dep(dir, &state->last_tlb, n);
+ break;
+
case QOP_TLB_COLOR_READ:
case QOP_MS_MASK:
add_write_dep(dir, &state->last_tlb, n);