bool Block::try_reserve_kcache(const AluGroup& group)
{
+ auto kcache = m_kcache;
+
auto kcache_constants = group.get_kconsts();
for (auto& kc : kcache_constants) {
auto u = kc->as_uniform();
assert(u);
- if (!try_reserve_kcache(*u))
+ if (!try_reserve_kcache(*u, kcache)) {
+ m_kcache_alloc_failed = true;
return false;
+ }
+ }
+
+ m_kcache = kcache;
+ m_kcache_alloc_failed = false;
+ return true;
+}
+
+bool Block::try_reserve_kcache(const AluInstr& instr)
+{
+ auto kcache = m_kcache;
+
+ for (auto& src : instr.sources()) {
+ auto u = src->as_uniform();
+ if (u) {
+ if (!try_reserve_kcache(*u, kcache)) {
+ m_kcache_alloc_failed = true;
+ return false;
+ }
+ }
}
+ m_kcache = kcache;
+ m_kcache_alloc_failed = false;
return true;
}
-bool Block::try_reserve_kcache(const UniformValue& u)
+bool Block::try_reserve_kcache(const UniformValue& u,
+ std::array<KCacheLine, 4>& kcache) const
{
const int kcache_banks = 4; // TODO: handle pre-evergreen
bool found = false;
for (int i = 0; i < kcache_banks && !found; ++i) {
- if (m_kcache[i].mode) {
- if (m_kcache[i].bank < bank)
+ if (kcache[i].mode) {
+ if (kcache[i].bank < bank)
continue;
- if ((m_kcache[i].bank == bank &&
- m_kcache[i].addr > line + 1) ||
- m_kcache[i].bank > bank) {
- if (m_kcache[kcache_banks - 1].mode)
+ if ((kcache[i].bank == bank &&
+ kcache[i].addr > line + 1) ||
+ kcache[i].bank > bank) {
+ if (kcache[kcache_banks - 1].mode)
return false;
- memmove(&m_kcache[i+1],&m_kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
- m_kcache[i].mode = KCacheLine::lock_1;
- m_kcache[i].bank = bank;
- m_kcache[i].addr = line;
+ memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(KCacheLine));
+ kcache[i].mode = KCacheLine::lock_1;
+ kcache[i].bank = bank;
+ kcache[i].addr = line;
return true;
}
- int d = line - m_kcache[i].addr;
+ int d = line - kcache[i].addr;
if (d == -1) {
- m_kcache[i].addr--;
- if (m_kcache[i].mode == KCacheLine::lock_2) {
+ kcache[i].addr--;
+ if (kcache[i].mode == KCacheLine::lock_2) {
/* we are prepending the line to the current set,
- * discarding the existing second line,
- * so we'll have to insert line+2 after it */
+ * discarding the existing second line,
+ * so we'll have to insert line+2 after it */
line += 2;
continue;
- } else if (m_kcache[i].mode == KCacheLine::lock_1) {
- m_kcache[i].mode = KCacheLine::lock_2;
+ } else if (kcache[i].mode == KCacheLine::lock_1) {
+ kcache[i].mode = KCacheLine::lock_2;
return true;
} else {
/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
return false;
}
} else if (d == 1) {
- m_kcache[i].mode = KCacheLine::lock_2;
+ kcache[i].mode = KCacheLine::lock_2;
return true;
- } else if (d == 0)
+ } else if (d == 0) {
return true;
+ }
} else { /* free kcache set - use it */
- m_kcache[i].mode = KCacheLine::lock_1;
- m_kcache[i].bank = bank;
- m_kcache[i].addr = line;
+ kcache[i].mode = KCacheLine::lock_1;
+ kcache[i].bank = bank;
+ kcache[i].addr = line;
return true;
}
}
void set_type(Type t);
uint32_t remaining_slots() const { return m_remaining_slots;}
- bool try_reserve_kcache(const AluGroup& group);
+ bool try_reserve_kcache(const AluGroup& instr);
+ bool try_reserve_kcache(const AluInstr& group);
auto last_lds_instr() {return m_last_lds_instr;}
void set_last_lds_instr(Instr *instr) {m_last_lds_instr = instr;}
size_t size() const { return m_instructions.size();}
+ bool kcache_reservation_failed() const { return m_kcache_alloc_failed;}
+
private:
- bool try_reserve_kcache(const UniformValue& u);
+ bool try_reserve_kcache(const UniformValue& u,
+ std::array<KCacheLine, 4>& kcache) const;
bool do_ready() const override {return true;};
void do_print(std::ostream& os) const override;
uint32_t m_remaining_slots{0xffff};
std::array<KCacheLine, 4> m_kcache;
+ bool m_kcache_alloc_failed{false};
Instr *m_last_lds_instr{nullptr};
int m_lds_group_requirement{0};
AluInstr *m_lds_group_start{nullptr};
+
};
class InstrWithVectorResult : public Instr {
bool has_lds_ready = !alu_vec_ready.empty() &&
(*alu_vec_ready.begin())->has_lds_access();
+ /* If we have ready ALU instructions we have to start a new ALU block */
+ if (has_alu_ready || !alu_groups_ready.empty()) {
+ if (m_current_block->type() != Block::alu) {
+ start_new_block(out_blocks, Block::alu);
+ m_alu_groups_schduled = 0;
+ }
+ }
+
/* Schedule groups first. unless we have a pending LDS instuction
* We don't want the LDS instructions to be too far apart because the
* fetch + read from queue has to be in the same ALU CF block */
if (!alu_groups_ready.empty() && !has_lds_ready) {
group = *alu_groups_ready.begin();
+ if (!m_current_block->try_reserve_kcache(*group)) {
+ start_new_block(out_blocks, Block::alu);
+ m_current_block->set_instr_flag(Instr::force_cf);
+ }
+
+ if (!m_current_block->try_reserve_kcache(*group))
+ unreachable("Scheduling a group in a new block should always succeed");
alu_groups_ready.erase(alu_groups_ready.begin());
sfn_log << SfnLog::schedule << "Schedule ALU group\n";
success = true;
+ } else if (has_alu_ready) {
+ group = new AluGroup();
+ sfn_log << SfnLog::schedule << "START new ALU group\n";
} else {
- if (has_alu_ready) {
- group = new AluGroup();
- sfn_log << SfnLog::schedule << "START new ALU group\n";
- }
+ return false;
}
- if (group) {
- int free_slots = group->free_slots();
+ assert(group);
- if (free_slots && has_alu_ready) {
- if (!alu_vec_ready.empty())
- success |= schedule_alu_to_group_vec(group);
-
- /* Apparently one can't schedule a t-slot if there is already
- * and LDS instruction scheduled.
- * TODO: check whether this is only relevant for actual LDS instructions
- * or also for instructions that read from the LDS return value queue */
-
- if (free_slots & 0x10 && !has_lds_ready) {
- sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
- if (!alu_trans_ready.empty())
- success |= schedule_alu_to_group_trans(group, alu_trans_ready);
- if (!alu_vec_ready.empty())
- success |= schedule_alu_to_group_trans(group, alu_vec_ready);
- }
- }
+ int free_slots = group->free_slots();
- sfn_log << SfnLog::schedule << "Finalize ALU group\n";
- group->set_scheduled();
- group->fix_last_flag();
- group->set_nesting_depth(m_current_block->nesting_depth());
+ while (free_slots && has_alu_ready) {
+ if (!alu_vec_ready.empty())
+ success |= schedule_alu_to_group_vec(group);
+ /* Apparently one can't schedule a t-slot if there is already
+ * and LDS instruction scheduled.
+ * TODO: check whether this is only relevant for actual LDS instructions
+ * or also for instructions that read from the LDS return value queue */
- if (m_current_block->type() != Block::alu) {
- start_new_block(out_blocks, Block::alu);
- m_alu_groups_schduled = 0;
+ if (free_slots & 0x10 && !has_lds_ready) {
+ sfn_log << SfnLog::schedule << "Try schedule TRANS channel\n";
+ if (!alu_trans_ready.empty())
+ success |= schedule_alu_to_group_trans(group, alu_trans_ready);
+ if (!alu_vec_ready.empty())
+ success |= schedule_alu_to_group_trans(group, alu_vec_ready);
}
- /* Pessimistic hack: If we have started an LDS group,
- * make sure 8 instructions groups still fit into the CF
- * TODO: take care of Address slot emission
- * TODO: maybe do this CF split only in the assembler
- */
- /*if (group->slots() > m_current_block->remaining_slots() ||
- (group->has_lds_group_start() &&
- m_current_block->remaining_slots() < 7 * 8)) {
- //assert(!m_current_block->lds_group_active());
- start_new_block(out_blocks, Block::alu);
- }*/
-
- if (!m_current_block->try_reserve_kcache(*group)) {
+ if (success) {
+ ++m_alu_groups_schduled;
+ break;
+ } else if (m_current_block->kcache_reservation_failed()) {
+ // LDS read groups should not lead to impossible
+ // kcache constellations
assert(!m_current_block->lds_group_active());
+
+ // kcache reservation failed, so we have to start a new CF
start_new_block(out_blocks, Block::alu);
m_current_block->set_instr_flag(Instr::force_cf);
+ } else {
+ return false;
}
+ }
- assert(m_current_block->try_reserve_kcache(*group));
-
- if (group->has_lds_group_start())
- m_current_block->lds_group_start(*group->begin());
+ sfn_log << SfnLog::schedule << "Finalize ALU group\n";
+ group->set_scheduled();
+ group->fix_last_flag();
+ group->set_nesting_depth(m_current_block->nesting_depth());
+ m_current_block->push_back(group);
- m_current_block->push_back(group);
- if (group->has_lds_group_end())
- m_current_block->lds_group_end();
- }
+ if (group->has_lds_group_start())
+ m_current_block->lds_group_start(*group->begin());
- if (success)
- ++m_alu_groups_schduled;
+ if (group->has_lds_group_end())
+ m_current_block->lds_group_end();
return success;
}
auto e = alu_vec_ready.end();
while (i != e) {
sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
+
+ if (!m_current_block->try_reserve_kcache(**i)) {
+ sfn_log << SfnLog::schedule << " failed (kcache)\n";
+ ++i;
+ continue;
+ }
+
if (group->add_vec_instructions(*i)) {
auto old_i = i;
++i;
auto e = readylist.end();
while (i != e) {
sfn_log << SfnLog::schedule << "Try schedule to trans " << **i;
+ if (!m_current_block->try_reserve_kcache(**i)) {
+ sfn_log << SfnLog::schedule << " failed (kcache)\n";
+ ++i;
+ continue;
+ }
+
if (group->add_trans_instructions(*i)) {
auto old_i = i;
++i;