/* legalize: */
bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
+bool ir3_legalize_relative(struct ir3 *ir);
static inline bool
ir3_has_latency_to_hide(struct ir3 *ir)
IR3_PASS(ir, ir3_postsched, so);
+ IR3_PASS(ir, ir3_legalize_relative);
IR3_PASS(ir, ir3_lower_subgroups);
if (so->type == MESA_SHADER_FRAGMENT)
* 1) Iteratively determine where sync ((sy)/(ss)) flags are needed,
* based on state flowing out of predecessor blocks until there is
* no further change. In some cases this requires inserting nops.
- * 2) Mark (ei) on last varying input, and (ul) on last use of a0.x
+ * 2) Mark (ei) on last varying input
* 3) Final nop scheduling for instruction latency
* 4) Resolve jumps and schedule blocks, marking potential convergence
* points with (jp)
if (bd->valid)
return false;
- struct ir3_instruction *last_rel = NULL;
struct ir3_instruction *last_n = NULL;
struct list_head instr_list;
struct ir3_legalize_state prev_state = bd->state;
regmask_init(&state->needs_sy, mergedregs);
}
}
-
- /* TODO: is it valid to have address reg loaded from a
- * relative src (ie. mova a0, c<a0.x+4>)? If so, the
- * last_rel check below should be moved ahead of this:
- */
- if (reg->flags & IR3_REG_RELATIV)
- last_rel = n;
}
foreach_dst (reg, n) {
regmask_init(&state->needs_ss_war, mergedregs);
regmask_init(&state->needs_ss, mergedregs);
}
-
- if (last_rel && (reg->num == regid(REG_A0, 0))) {
- last_rel->flags |= IR3_INSTR_UL;
- last_rel = NULL;
- }
}
/* cat5+ does not have an (ss) bit, if needed we need to
list_add(&baryf->node, &block->instr_list);
}
- if (last_rel)
- last_rel->flags |= IR3_INSTR_UL;
-
bd->valid = true;
if (memcmp(&prev_state, state, sizeof(*state))) {
--- /dev/null
+/*
+ * Copyright 2022 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "ir3.h"
+
+/*
+ * Mark (ul) on the last user of a0 before a0 is loaded again. emit_block
+ * makes sure a0 is loaded first if there is any user in the block. This
+ * allows us to process each block independently.
+ *
+ * Note that this must be called before passes that break the assumption, such
+ * as ir3_lower_subgroups.
+ */
+
+static bool
+is_reg_relative(const struct ir3_instruction *instr)
+{
+ foreach_dst (reg, instr) {
+ if (reg->flags & IR3_REG_RELATIV)
+ return true;
+ }
+
+ foreach_src (reg, instr) {
+ if (reg->flags & IR3_REG_RELATIV)
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+is_dst_a0(const struct ir3_instruction *instr)
+{
+ foreach_dst (reg, instr) {
+ if (reg->num == regid(REG_A0, 0))
+ return true;
+ }
+
+ return false;
+}
+
+bool
+ir3_legalize_relative(struct ir3 *ir)
+{
+ foreach_block (block, &ir->block_list) {
+ struct ir3_instruction *last_user = NULL;
+
+ foreach_instr (instr, &block->instr_list) {
+ if (is_reg_relative(instr))
+ last_user = instr;
+
+ /* Is it valid to have address reg loaded from a relative src (ie.
+ * mova a0, c<a0.x+4>)? This marks the load (ul), which may or may
+ * not be valid.
+ */
+ if (last_user && is_dst_a0(instr)) {
+ last_user->flags |= IR3_INSTR_UL;
+ last_user = NULL;
+ }
+ }
+
+ if (last_user)
+ last_user->flags |= IR3_INSTR_UL;
+ }
+
+ return true;
+}
'ir3_image.h',
'ir3.h',
'ir3_legalize.c',
+ 'ir3_legalize_relative.c',
'ir3_liveness.c',
'ir3_lower_parallelcopy.c',
'ir3_lower_spill.c',