#define ACC_INDEX 0
#define ACC_COUNT 6
+/* RA nodes used to track RF registers with implicit writes */
+#define IMPLICIT_RF_COUNT 1
+
#define PHYS_COUNT 64
static uint8_t
static inline uint32_t
temp_to_node(struct v3d_compile *c, uint32_t temp)
{
- return temp + (c->devinfo->has_accumulators ? ACC_COUNT : 0);
+ return temp + (c->devinfo->has_accumulators ? ACC_COUNT :
+ IMPLICIT_RF_COUNT);
}
static inline uint32_t
node_to_temp(struct v3d_compile *c, uint32_t node)
{
assert((c->devinfo->has_accumulators && node >= ACC_COUNT) ||
- (!c->devinfo->has_accumulators && node >= 0));
- return node - (c->devinfo->has_accumulators ? ACC_COUNT : 0);
+ (!c->devinfo->has_accumulators && node >= IMPLICIT_RF_COUNT));
+ return node - (c->devinfo->has_accumulators ? ACC_COUNT :
+ IMPLICIT_RF_COUNT);
}
static inline uint8_t
c->nodes.info = reralloc_array_size(c,
c->nodes.info,
sizeof(c->nodes.info[0]),
- c->nodes.alloc_count + ACC_COUNT);
+ c->nodes.alloc_count +
+ MAX2(ACC_COUNT, IMPLICIT_RF_COUNT));
}
/* Creates the interference node for a new temp. We use this to keep the node
ensure_nodes(c);
int node = ra_add_node(c->g, choose_reg_class(c, class_bits));
- assert(node == temp + ACC_COUNT);
+ assert(c->devinfo->has_accumulators ? node == temp + ACC_COUNT :
+ node == temp + IMPLICIT_RF_COUNT);
/* We fill the node priority after we are done inserting spills */
c->nodes.info[node].class_bits = class_bits;
}
static void
-update_graph_and_reg_classes_for_inst(struct v3d_compile *c, int *acc_nodes,
+update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
+ int *acc_nodes,
+ int *implicit_rf_nodes,
struct qinst *inst)
{
int32_t ip = inst->ip;
}
}
+ /* If any instruction writes to a physical register implicitly
+ * nothing else can write the same register across it.
+ */
+ if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
+ for (int i = 0; i < c->num_temps; i++) {
+ if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
+ ra_add_node_interference(c->g,
+ temp_to_node(c, i),
+ implicit_rf_nodes[0]);
+ }
+ }
+ }
+
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
switch (inst->qpu.alu.add.op) {
case V3D_QPU_A_LDVPMV_IN:
CLASS_BITS_R5);
}
}
+ } else {
+ /* If the instruction has an implicit write
+ * we can't allocate its dest to the same
+ * register.
+ */
+ if (v3d_qpu_writes_rf0_implicitly(c->devinfo, &inst->qpu)) {
+ ra_add_node_interference(c->g,
+ temp_to_node(c, inst->dst.index),
+ implicit_rf_nodes[0]);
+ }
}
}
v3d_register_allocate(struct v3d_compile *c)
{
int acc_nodes[ACC_COUNT];
+ int implicit_rf_nodes[IMPLICIT_RF_COUNT];
+
+ unsigned num_ra_nodes = c->num_temps;
+ if (c->devinfo->has_accumulators)
+ num_ra_nodes += ARRAY_SIZE(acc_nodes);
+ else
+ num_ra_nodes += ARRAY_SIZE(implicit_rf_nodes);
+
c->nodes = (struct v3d_ra_node_info) {
.alloc_count = c->num_temps,
.info = ralloc_array_size(c, sizeof(c->nodes.info[0]),
- c->num_temps + ACC_COUNT),
+ num_ra_nodes),
};
uint32_t phys_index = get_phys_index(c->devinfo);
c->thread_index--;
}
- unsigned num_ra_nodes = c->num_temps;
- if (c->devinfo->has_accumulators)
- num_ra_nodes += ARRAY_SIZE(acc_nodes);
c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
* interfere with when ops have implied r3/r4 writes or for the thread
* switches. We could represent these as classes for the nodes to
* live in, but the classes take up a lot of memory to set up, so we
- * don't want to make too many.
+ * don't want to make too many. We use the same mechanism on platforms
+ * without accumulators that can have implicit writes to phys regs.
*/
for (uint32_t i = 0; i < num_ra_nodes; i++) {
if (c->devinfo->has_accumulators && i < ACC_COUNT) {
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
c->nodes.info[i].priority = 0;
c->nodes.info[i].class_bits = 0;
+ } else if (!c->devinfo->has_accumulators &&
+ i < ARRAY_SIZE(implicit_rf_nodes)) {
+ implicit_rf_nodes[i] = i;
+ ra_set_node_reg(c->g, implicit_rf_nodes[i], phys_index + i);
+ c->nodes.info[i].priority = 0;
+ c->nodes.info[i].class_bits = 0;
} else {
uint32_t t = node_to_temp(c, i);
c->nodes.info[i].priority =
int ip = 0;
vir_for_each_inst_inorder(inst, c) {
inst->ip = ip++;
- update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
+ update_graph_and_reg_classes_for_inst(c, acc_nodes,
+ implicit_rf_nodes, inst);
}
/* Set the register classes for all our temporaries in the graph */