return sel & sdwa_asuint;
}
+unsigned get_mimg_nsa_dwords(const Instruction *instr) {
+ unsigned addr_dwords = instr->operands.size() - 3;
+ for (unsigned i = 1; i < addr_dwords; i++) {
+ if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
+ return DIV_ROUND_UP(addr_dwords - 1, 4);
+ }
+ return 0;
+}
+
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
{
/* lower remaining pseudo-instructions */
break;
}
case Format::MIMG: {
- unsigned use_nsa = false;
- unsigned addr_dwords = instr->operands.size() - 3;
- for (unsigned i = 1; i < addr_dwords; i++) {
- if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
- use_nsa = true;
- }
- assert(!use_nsa || ctx.chip_class >= GFX10);
- unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
+ unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
+ assert(!nsa_dwords || ctx.chip_class >= GFX10);
MIMG_instruction& mimg = instr->mimg();
uint32_t encoding = (0b111100 << 26);
if (nsa_dwords) {
out.resize(out.size() + nsa_dwords);
std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
- for (unsigned i = 0; i < addr_dwords - 1; i++)
+ for (unsigned i = 0; i < instr->operands.size() - 4u; i++)
nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
}
break;
#include <algorithm>
#include "aco_ir.h"
+#include "aco_builder.h"
#include <stack>
#include <functional>
bool has_branch_after_VMEM = false;
bool has_DS = false;
bool has_branch_after_DS = false;
+ bool has_NSA_MIMG = false;
std::bitset<128> sgprs_read_by_VMEM;
std::bitset<128> sgprs_read_by_SMEM;
has_branch_after_VMEM |= other.has_branch_after_VMEM;
has_DS |= other.has_DS;
has_branch_after_DS |= other.has_branch_after_DS;
+ has_NSA_MIMG |= other.has_NSA_MIMG;
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
}
has_branch_after_VMEM == other.has_branch_after_VMEM &&
has_DS == other.has_DS &&
has_branch_after_DS == other.has_branch_after_DS &&
+ has_NSA_MIMG == other.has_NSA_MIMG &&
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
}
wait->imm = 0;
new_instructions.emplace_back(std::move(wait));
}
+
+ /* NSAToVMEMBug
+ * Handles NSA MIMG (4 or more dwords) immediately followed by MUBUF/MTBUF (with offset[2:1] != 0).
+ */
+ if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
+ ctx.has_NSA_MIMG = true;
+ } else if (ctx.has_NSA_MIMG) {
+ ctx.has_NSA_MIMG = false;
+
+ if (instr->isMUBUF() || instr->isMTBUF()) {
+ uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
+ if (offset & 6)
+ Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
+ }
+ }
}
template <typename Ctx>
uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
+unsigned get_mimg_nsa_dwords(const Instruction *instr);
+
enum block_kind {
/* uniform indicates that leaving this block,
* all actives lanes stay active */