else
return CLASS_BITS_PHYS;
}
+
+static uint8_t
+filter_class_bits(const struct v3d_device_info *devinfo, uint8_t class_bits)
+{
+ if (!devinfo->has_accumulators) {
+ assert(class_bits & CLASS_BITS_PHYS);
+ class_bits = CLASS_BITS_PHYS;
+ }
+ return class_bits;
+}
+
static inline uint32_t
temp_to_node(struct v3d_compile *c, uint32_t temp)
{
*/
if (c->spilling) {
int temp_class = CLASS_BITS_PHYS;
- if (i != c->spill_base.index)
+ if (c->devinfo->has_accumulators &&
+ i != c->spill_base.index) {
temp_class |= CLASS_BITS_ACC;
+ }
add_node(c, i, temp_class);
}
}
* temp will be used immediately so just like the uniform above we
* can allow accumulators.
*/
+ int temp_class =
+ filter_class_bits(c->devinfo, CLASS_BITS_PHYS | CLASS_BITS_ACC);
if (!fill_dst) {
struct qreg dst = vir_TMUWT(c);
assert(dst.file == QFILE_TEMP);
- add_node(c, dst.index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
+ add_node(c, dst.index, temp_class);
} else {
*fill_dst = vir_LDTMU(c);
assert(fill_dst->file == QFILE_TEMP);
- add_node(c, fill_dst->index, CLASS_BITS_PHYS | CLASS_BITS_ACC);
+ add_node(c, fill_dst->index, temp_class);
}
/* Temps across the thread switch we injected can't be assigned to
* instruction immediately after so we
* can use ACC.
*/
- add_node(c, temp.index, CLASS_BITS_PHYS |
- CLASS_BITS_ACC);
+ int temp_class =
+ filter_class_bits(c->devinfo, CLASS_BITS_PHYS |
+ CLASS_BITS_ACC);
+ add_node(c, temp.index, temp_class);
} else {
/* If we have a postponed spill, we
* don't need a fill as the temp would
compiler->reg_class_phys[threads] =
ra_alloc_contig_reg_class(compiler->regs, 1);
+ /* Init physical regs */
for (int i = phys_index;
i < phys_index + (PHYS_COUNT >> threads); i++) {
if (compiler->devinfo->has_accumulators)
ra_class_add_reg(compiler->reg_class_any[threads], i);
}
+ /* Init accumulator regs */
if (compiler->devinfo->has_accumulators) {
for (int i = ACC_INDEX + 0; i < ACC_INDEX + ACC_COUNT - 1; i++) {
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
ra_class_add_reg(compiler->reg_class_any[threads], i);
}
- }
- /* r5 can only store a single 32-bit value, so not much can
- * use it.
- */
- if (compiler->devinfo->has_accumulators) {
+ /* r5 can only store a single 32-bit value, so not much can
+ * use it.
+ */
ra_class_add_reg(compiler->reg_class_r5[threads],
ACC_INDEX + 5);
ra_class_add_reg(compiler->reg_class_any[threads],
* because ldunif has usually a shorter lifespan, allowing for
* more accumulator reuse and QPU merges.
*/
- if (!inst->qpu.sig.ldunif) {
- uint8_t class_bits =
- get_temp_class_bits(c, inst->dst.index) &
- ~CLASS_BITS_R5;
- set_temp_class_bits(c, inst->dst.index,
- class_bits);
-
- } else {
- /* Until V3D 4.x, we could only load a uniform
- * to r5, so we'll need to spill if uniform
- * loads interfere with each other.
- */
- if (c->devinfo->ver < 40) {
+ if (c->devinfo->has_accumulators) {
+ if (!inst->qpu.sig.ldunif) {
+ uint8_t class_bits =
+ get_temp_class_bits(c, inst->dst.index) &
+ ~CLASS_BITS_R5;
set_temp_class_bits(c, inst->dst.index,
- CLASS_BITS_R5);
+ class_bits);
+
+ } else {
+ /* Until V3D 4.x, we could only load a uniform
+ * to r5, so we'll need to spill if uniform
+ * loads interfere with each other.
+ */
+ if (c->devinfo->ver < 40) {
+ set_temp_class_bits(c, inst->dst.index,
+ CLASS_BITS_R5);
+ }
}
}
}
c->thread_index--;
}
- c->g = ra_alloc_interference_graph(c->compiler->regs,
- c->num_temps + ARRAY_SIZE(acc_nodes));
+ unsigned num_ra_nodes = c->num_temps;
+ if (c->devinfo->has_accumulators)
+ num_ra_nodes += ARRAY_SIZE(acc_nodes);
+ c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
/* Make some fixed nodes for the accumulators, which we will need to
* live in, but the classes take up a lot of memory to set up, so we
* don't want to make too many.
*/
- for (uint32_t i = 0; i < ACC_COUNT + c->num_temps; i++) {
- if (i < ACC_COUNT) {
+ for (uint32_t i = 0; i < num_ra_nodes; i++) {
+ if (c->devinfo->has_accumulators && i < ACC_COUNT) {
acc_nodes[i] = i;
ra_set_node_reg(c->g, acc_nodes[i], ACC_INDEX + i);
c->nodes.info[i].priority = 0;