}
calc_min_waves(program);
- program->vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
- program->sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
unsigned scratch_size = 0;
if (program->stage == gs_copy_vs) {
program->physical_sgprs = 800;
program->sgpr_alloc_granule = 16;
program->sgpr_limit = 102;
- if (family == CHIP_TONGA || family == CHIP_ICELAND) {
- program->sgpr_alloc_granule = 96;
- program->sgpr_limit = 94; /* workaround hardware bug */
- }
+ if (family == CHIP_TONGA || family == CHIP_ICELAND)
+ program->sgpr_alloc_granule = 96; /* workaround hardware bug */
} else {
program->physical_sgprs = 512;
program->sgpr_alloc_granule = 8;
return align(std::max(addressable_vgprs, granule), granule);
}
-uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves)
+unsigned round_down(unsigned a, unsigned b)
{
- uint16_t sgprs = (program->physical_sgprs / max_waves) - program->sgpr_alloc_granule + 1;
- sgprs = get_sgpr_alloc(program, sgprs);
+ return a - (a % b);
+}
+
+uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t waves)
+{
+ /* it's not possible to allocate more than 128 SGPRs */
+ uint16_t sgprs = std::min(program->physical_sgprs / waves, 128);
+ sgprs = round_down(sgprs, program->sgpr_alloc_granule);
sgprs -= get_extra_sgprs(program);
return std::min(sgprs, program->sgpr_limit);
}
-uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves)
+uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t waves)
{
- uint16_t vgprs = program->physical_vgprs / max_waves & ~(program->vgpr_alloc_granule - 1);
+ uint16_t vgprs = program->physical_vgprs / waves & ~(program->vgpr_alloc_granule - 1);
return std::min(vgprs, program->vgpr_limit);
}
unsigned simd_per_cu_wgp = wgp ? simd_per_cu * 2 : simd_per_cu;
unsigned lds_limit = wgp ? program->lds_limit * 2 : program->lds_limit;
+ assert(program->min_waves >= 1);
+ uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
+ uint16_t vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
+
/* this won't compile, register pressure reduction necessary */
- if (new_demand.vgpr > program->vgpr_limit || new_demand.sgpr > program->sgpr_limit) {
+ if (new_demand.vgpr > vgpr_limit || new_demand.sgpr > sgpr_limit) {
program->num_waves = 0;
program->max_reg_demand = new_demand;
} else {
std::unordered_map<unsigned, Instruction*> vectors;
std::unordered_map<unsigned, Instruction*> split_vectors;
aco_ptr<Instruction> pseudo_dummy;
- unsigned max_used_sgpr = 0;
- unsigned max_used_vgpr = 0;
+ uint16_t max_used_sgpr = 0;
+ uint16_t max_used_vgpr = 0;
+ uint16_t sgpr_limit;
+ uint16_t vgpr_limit;
std::bitset<64> defs_done; /* see MAX_ARGS in aco_instruction_selection_setup.cpp */
ra_test_policy policy;
policy(policy_)
{
pseudo_dummy.reset(create_instruction<Instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 0, 0));
+ sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
+ vgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
}
};
void adjust_max_used_regs(ra_ctx& ctx, RegClass rc, unsigned reg)
{
- unsigned max_addressible_sgpr = ctx.program->sgpr_limit;
+ uint16_t max_addressible_sgpr = ctx.sgpr_limit;
unsigned size = rc.size();
if (rc.type() == RegType::vgpr) {
assert(reg >= 256);
- unsigned hi = reg - 256 + size - 1;
+ uint16_t hi = reg - 256 + size - 1;
ctx.max_used_vgpr = std::max(ctx.max_used_vgpr, hi);
} else if (reg + rc.size() <= max_addressible_sgpr) {
- unsigned hi = reg + size - 1;
+ uint16_t hi = reg + size - 1;
ctx.max_used_sgpr = std::max(ctx.max_used_sgpr, std::min(hi, max_addressible_sgpr));
}
}
}
bool increase_register_file(ra_ctx& ctx, RegType type) {
- uint16_t max_addressible_sgpr = ctx.program->sgpr_limit;
- uint16_t max_addressible_vgpr = ctx.program->vgpr_limit;
- if (type == RegType::vgpr && ctx.program->max_reg_demand.vgpr < max_addressible_vgpr) {
+ if (type == RegType::vgpr && ctx.program->max_reg_demand.vgpr < ctx.vgpr_limit) {
update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr + 1, ctx.program->max_reg_demand.sgpr));
- } else if (type == RegType::sgpr && ctx.program->max_reg_demand.sgpr < max_addressible_sgpr) {
+ } else if (type == RegType::sgpr && ctx.program->max_reg_demand.sgpr < ctx.sgpr_limit) {
update_vgpr_sgpr_demand(ctx.program, RegisterDemand(ctx.program->max_reg_demand.vgpr, ctx.program->max_reg_demand.sgpr + 1));
} else {
return false;
}
/* num_gpr = rnd_up(max_used_gpr + 1) */
- program->config->num_vgprs = align(ctx.max_used_vgpr + 1, 4);
- if (program->family == CHIP_TONGA || program->family == CHIP_ICELAND) /* workaround hardware bug */
- program->config->num_sgprs = get_sgpr_alloc(program, program->sgpr_limit);
- else
- program->config->num_sgprs = align(ctx.max_used_sgpr + 1 + get_extra_sgprs(program), 8);
+ program->config->num_vgprs = get_vgpr_alloc(program, ctx.max_used_vgpr + 1);
+ program->config->num_sgprs = get_sgpr_alloc(program, ctx.max_used_sgpr + 1);
}
}
/* calculate target register demand */
RegisterDemand register_target = program->max_reg_demand;
- if (register_target.sgpr > program->sgpr_limit)
- register_target.vgpr += (register_target.sgpr - program->sgpr_limit + program->wave_size - 1 + 32) / program->wave_size;
- register_target.sgpr = program->sgpr_limit;
-
- if (register_target.vgpr > program->vgpr_limit)
- register_target.sgpr = program->sgpr_limit - 5;
+ uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->min_waves);
+ uint16_t vgpr_limit = get_addr_vgpr_from_waves(program, program->min_waves);
+ if (register_target.sgpr > sgpr_limit)
+ register_target.vgpr += (register_target.sgpr - sgpr_limit + program->wave_size - 1 + 32) / program->wave_size;
+ register_target.sgpr = sgpr_limit;
+
+ if (register_target.vgpr > vgpr_limit)
+ register_target.sgpr = sgpr_limit - 5;
int spills_to_vgpr = (program->max_reg_demand.sgpr - register_target.sgpr + program->wave_size - 1 + 32) / program->wave_size;
- register_target.vgpr = program->vgpr_limit - spills_to_vgpr;
+ register_target.vgpr = vgpr_limit - spills_to_vgpr;
/* initialize ctx */
spill_ctx ctx(register_target, program, live_vars.register_demand);
bool err = false;
aco::live live_vars = aco::live_var_analysis(program);
std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
+ uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
std::map<unsigned, Assignment> assignments;
for (Block& block : program->blocks) {
if (assignments.count(op.tempId()) && assignments[op.tempId()].reg != op.physReg())
err |= ra_fail(program, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an inconsistent register assignment with instruction", i);
if ((op.getTemp().type() == RegType::vgpr && op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
- (op.getTemp().type() == RegType::sgpr && op.physReg() + op.size() > program->config->num_sgprs && op.physReg() < program->sgpr_limit))
+ (op.getTemp().type() == RegType::sgpr && op.physReg() + op.size() > program->config->num_sgprs && op.physReg() < sgpr_limit))
err |= ra_fail(program, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i);
if (op.physReg() == vcc && !program->needs_vcc)
err |= ra_fail(program, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i);
if (assignments[def.tempId()].defloc.block)
err |= ra_fail(program, loc, assignments.at(def.tempId()).defloc, "Temporary %%%d also defined by instruction", def.tempId());
if ((def.getTemp().type() == RegType::vgpr && def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
- (def.getTemp().type() == RegType::sgpr && def.physReg() + def.size() > program->config->num_sgprs && def.physReg() < program->sgpr_limit))
+ (def.getTemp().type() == RegType::sgpr && def.physReg() + def.size() > program->config->num_sgprs && def.physReg() < sgpr_limit))
err |= ra_fail(program, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i);
if (def.physReg() == vcc && !program->needs_vcc)
err |= ra_fail(program, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i);
program.reset(new Program);
aco::init_program(program.get(), stage, &info, chip_class, family, &config);
+ program->workgroup_size = UINT_MAX;
+ calc_min_waves(program.get());
program->debug.func = nullptr;
program->debug.private_data = nullptr;