#define ACC_INDEX 0
#define ACC_COUNT 6
-#define PHYS_INDEX (ACC_INDEX + ACC_COUNT)
-#define PHYS_COUNT 64
+#define PHYS_COUNT 64
+
+static uint8_t
+get_phys_index(const struct v3d_device_info *devinfo)
+{
+ if (devinfo->has_accumulators)
+ return ACC_INDEX + ACC_COUNT;
+ else
+ return 0;
+}
+
+/* ACC as accumulator */
#define CLASS_BITS_PHYS (1 << 0)
#define CLASS_BITS_ACC (1 << 1)
#define CLASS_BITS_R5 (1 << 4)
}
struct v3d_ra_select_callback_data {
+ uint32_t phys_index;
uint32_t next_acc;
uint32_t next_phys;
struct v3d_ra_node_info *nodes;
+ const struct v3d_device_info *devinfo;
};
/* Choosing accumulators improves chances of merging QPU instructions
static const int available_rf_threshold = 5;
int available_rf = 0 ;
for (int i = 0; i < PHYS_COUNT; i++) {
- if (BITSET_TEST(regs, PHYS_INDEX + i))
+ if (BITSET_TEST(regs, v3d_ra->phys_index + i))
available_rf++;
if (available_rf >= available_rf_threshold)
break;
{
for (int i = 0; i < PHYS_COUNT; i++) {
int phys_off = (v3d_ra->next_phys + i) % PHYS_COUNT;
- int phys = PHYS_INDEX + phys_off;
+ int phys = v3d_ra->phys_index + phys_off;
if (BITSET_TEST(regs, phys)) {
v3d_ra->next_phys = phys_off + 1;
* register file can be divided up for fragment shader threading.
*/
int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
+ uint8_t phys_index = get_phys_index(compiler->devinfo);
- compiler->regs = ra_alloc_reg_set(compiler, PHYS_INDEX + PHYS_COUNT,
+ compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
false);
if (!compiler->regs)
return false;
compiler->reg_class_phys[threads] =
ra_alloc_contig_reg_class(compiler->regs, 1);
- for (int i = PHYS_INDEX;
- i < PHYS_INDEX + (PHYS_COUNT >> threads); i++) {
+ for (int i = phys_index;
+ i < phys_index + (PHYS_COUNT >> threads); i++) {
ra_class_add_reg(compiler->reg_class_phys_or_acc[threads], i);
ra_class_add_reg(compiler->reg_class_phys[threads], i);
ra_class_add_reg(compiler->reg_class_any[threads], i);
assert(inst->dst.file == QFILE_TEMP);
uint32_t node = temp_to_node(c, inst->dst.index);
ra_set_node_reg(c->g, node,
- PHYS_INDEX + inst->src[0].index);
+ get_phys_index(c->devinfo) +
+ inst->src[0].index);
break;
}
}
c->num_temps + ACC_COUNT),
};
+ uint32_t phys_index = get_phys_index(c->devinfo);
+
struct v3d_ra_select_callback_data callback_data = {
+ .phys_index = phys_index,
.next_acc = 0,
/* Start at RF3, to try to keep the TLB writes from using
* RF0-2.
*/
.next_phys = 3,
.nodes = &c->nodes,
+ .devinfo = c->devinfo,
};
vir_calculate_live_intervals(c);
vir_for_each_inst_inorder(inst, c) {
inst->ip = ip++;
update_graph_and_reg_classes_for_inst(c, acc_nodes, inst);
+
}
/* Set the register classes for all our temporaries in the graph */
temp_registers = calloc(c->num_temps, sizeof(*temp_registers));
for (uint32_t i = 0; i < c->num_temps; i++) {
int ra_reg = ra_get_node_reg(c->g, temp_to_node(c, i));
- if (ra_reg < PHYS_INDEX) {
+ if (ra_reg < phys_index) {
temp_registers[i].magic = true;
temp_registers[i].index = (V3D_QPU_WADDR_R0 +
ra_reg - ACC_INDEX);
} else {
temp_registers[i].magic = false;
- temp_registers[i].index = ra_reg - PHYS_INDEX;
+ temp_registers[i].index = ra_reg - phys_index;
}
}