void
kill(wait_imm& imm, Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
{
- if (debug_flags & DEBUG_FORCE_WAITCNT) {
+ if (instr->opcode == aco_opcode::s_setpc_b64 || (debug_flags & DEBUG_FORCE_WAITCNT)) {
/* Force emitting waitcnt states right after the instruction if there is
- * something to wait for.
+ * something to wait for. This is also applied for s_setpc_b64 to ensure
+ * waitcnt states are inserted before jumping to the PS epilog.
*/
force_waitcnt(ctx, imm);
}
/* don't move non-reorderable instructions */
if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
- instr->opcode == aco_opcode::p_init_scratch)
+ instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog)
return hazard_fail_unreorderable;
memory_event_set instr_set;
bool flat = instr->isFlatLike();
bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
instr->opcode == aco_opcode::p_create_vector ||
+ instr->opcode == aco_opcode::p_jump_to_epilog ||
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
(instr->isScratch() && i == 0);
unsigned comp = data_bits / MAX2(op_bits, 1);
check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
instr.get());
+ } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
+ check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
+ instr.get());
+ check(instr->operands.size() > 0 &&
+ instr->operands[0].getTemp().type() == RegType::sgpr &&
+ instr->operands[0].getTemp().size() == 2,
+ "First operand of p_jump_to_epilog must be a SGPR", instr.get());
+ for (unsigned i = 1; i < instr->operands.size(); i++) {
+ check(instr->operands[i].getTemp().type() == RegType::vgpr ||
+ instr->operands[i].isUndefined(),
+ "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
+ }
}
break;
}