if (ctx.chip_class <= GFX9) {
assert(flat->offset <= 0x1fff);
encoding |= flat->offset & 0x1fff;
- } else if (instr->format == Format::FLAT) {
+ } else if (instr->isFlat()) {
/* GFX10 has a 12-bit immediate OFFSET field,
* but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug
*/
assert(flat->offset <= 0xfff);
encoding |= flat->offset & 0xfff;
}
- if (instr->format == Format::SCRATCH)
+ if (instr->isScratch())
encoding |= 1 << 14;
- else if (instr->format == Format::GLOBAL)
+ else if (instr->isGlobal())
encoding |= 2 << 14;
encoding |= flat->lds ? 1 << 13 : 0;
encoding |= flat->glc ? 1 << 16 : 0;
unreachable("Pseudo instructions should be lowered before assembly.");
break;
default:
- if ((uint16_t) instr->format & (uint16_t) Format::VOP3) {
+ if (instr->isVOP3()) {
VOP3_instruction* vop3 = instr->vop3();
- if ((uint16_t) instr->format & (uint16_t) Format::VOP2) {
+ if (instr->isVOP2()) {
opcode = opcode + 0x100;
- } else if ((uint16_t) instr->format & (uint16_t) Format::VOP1) {
+ } else if (instr->isVOP1()) {
if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9)
opcode = opcode + 0x140;
else
opcode = opcode + 0x180;
- } else if ((uint16_t) instr->format & (uint16_t) Format::VOPC) {
+ } else if (instr->isVOPC()) {
opcode = opcode + 0x0;
- } else if ((uint16_t) instr->format & (uint16_t) Format::VINTRP) {
+ } else if (instr->isVINTRP()) {
opcode = opcode + 0x270;
}
encoding |= vop3->neg[i] << (29+i);
out.push_back(encoding);
- } else if (instr->format == Format::VOP3P) {
+ } else if (instr->isVOP3P()) {
VOP3P_instruction* vop3 = instr->vop3p();
uint32_t encoding;
uint32_t encoding = 0;
- if ((uint16_t)instr->format & (uint16_t)Format::VOPC) {
+ if (instr->isVOPC()) {
if (instr->definitions[0].physReg() != vcc) {
encoding |= instr->definitions[0].physReg() << 8;
encoding |= 1 << 15;
std::vector<aco_ptr<Instruction>>::reverse_iterator it = block.instructions.rbegin();
while ( it != block.instructions.rend())
{
- if ((*it)->format == Format::EXP) {
+ if ((*it)->isEXP()) {
Export_instruction* exp = (*it)->exp();
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr)
{
- if (instr->definitions.empty() || instr->format == Format::PSEUDO_BRANCH)
+ if (instr->definitions.empty() || instr->isBranch())
return false;
if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
[&uses] (const Definition& def) { return uses[def.tempId()];}))
if (instr->isVMEM() && !instr->operands.empty()) {
resource = instr->operands[0].tempId();
type = clause_vmem;
- } else if (instr->format == Format::SCRATCH || instr->format == Format::GLOBAL) {
+ } else if (instr->isScratch() || instr->isGlobal()) {
type = clause_vmem;
- } else if (instr->format == Format::FLAT) {
+ } else if (instr->isFlat()) {
type = clause_flat;
- } else if (instr->format == Format::SMEM && !instr->operands.empty()) {
+ } else if (instr->isSMEM() && !instr->operands.empty()) {
type = clause_smem;
if (instr->operands[0].bytes() == 16)
resource = instr->operands[0].tempId();
bool is_hazard = writemask != 0 &&
((pred->isVALU() && Valu) ||
- (pred->format == Format::VINTRP && Vintrp) ||
+ (pred->isVINTRP() && Vintrp) ||
(pred->isSALU() && Salu));
if (is_hazard)
return nops_needed;
/* check hazards */
int NOPs = 0;
- if (instr->format == Format::SMEM) {
+ if (instr->isSMEM()) {
if (program->chip_class == GFX6) {
/* A read of an SGPR by SMRD instruction requires 4 wait states
* when the SGPR was written by a VALU instruction. According to LLVM,
if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata)
NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
- } else if (instr->format == Format::DS && instr->ds()->gds) {
+ } else if (instr->isDS() && instr->ds()->gds) {
NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
- } else if (instr->isVALU() || instr->format == Format::VINTRP) {
+ } else if (instr->isVALU() || instr->isVINTRP()) {
for (Operand op : instr->operands) {
if (op.physReg() == vccz)
NOPs = MAX2(NOPs, ctx.valu_wr_vcc_then_vccz);
if (instr->opcode == aco_opcode::v_div_fmas_f32 || instr->opcode == aco_opcode::v_div_fmas_f64)
NOPs = MAX2(NOPs, ctx.valu_wr_vcc_then_div_fmas);
- } else if (instr->isVMEM() || instr->isFlatOrGlobal() || instr->format == Format::SCRATCH) {
+ } else if (instr->isVMEM() || instr->isFlatLike()) {
/* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */
for (Operand op : instr->operands) {
if (!op.isConstant() && !op.isUndefined() && op.regClass().type() == RegType::sgpr)
NOPs = MAX2(NOPs, ctx.set_vskip_mode_then_vector);
if (program->chip_class == GFX9) {
- bool lds_scratch_global = (instr->format == Format::SCRATCH || instr->format == Format::GLOBAL) &&
+ bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) &&
instr->flatlike()->lds;
- if (instr->format == Format::VINTRP ||
+ if (instr->isVINTRP() ||
instr->opcode == aco_opcode::ds_read_addtid_b32 ||
instr->opcode == aco_opcode::ds_write_addtid_b32 ||
instr->opcode == aco_opcode::buffer_store_lds_dword ||
}
}
- if (instr->format == Format::SMEM) {
+ if (instr->isSMEM()) {
if (instr->definitions.empty() || instr_info.is_atomic[(unsigned)instr->opcode]) {
ctx.smem_write = true;
} else {
if (reg == 1 && offset >= 28 && size > (28 - offset))
ctx.set_vskip_mode_then_vector = 2;
}
- } else if (instr->isVMEM() || instr->isFlatOrGlobal() || instr->format == Format::SCRATCH) {
+ } else if (instr->isVMEM() || instr->isFlatLike()) {
/* >64-bit MUBUF/MTBUF store with a constant in SOFFSET */
- bool consider_buf = (instr->format == Format::MUBUF || instr->format == Format::MTBUF) &&
+ bool consider_buf = (instr->isMUBUF() || instr->isMTBUF()) &&
instr->operands.size() == 4 &&
instr->operands[3].size() > 2 &&
instr->operands[2].physReg() >= 128;
/* MIMG store with a 128-bit T# with more than two bits set in dmask (making it a >64-bit store) */
- bool consider_mimg = instr->format == Format::MIMG &&
+ bool consider_mimg = instr->isMIMG() &&
instr->operands[1].regClass().type() == RegType::vgpr &&
instr->operands[1].size() > 2 &&
instr->operands[0].size() == 4;
/* FLAT/GLOBAL/SCRATCH store with >64-bit data */
- bool consider_flat = (instr->isFlatOrGlobal() || instr->format == Format::SCRATCH) &&
- instr->operands.size() == 3 &&
- instr->operands[2].size() > 2;
+ bool consider_flat = instr->isFlatLike() &&
+ instr->operands.size() == 3 &&
+ instr->operands[2].size() > 2;
if (consider_buf || consider_mimg || consider_flat) {
PhysReg wrdata = instr->operands[consider_flat ? 2 : 3].physReg();
unsigned size = instr->operands[consider_flat ? 2 : 3].size();
bool VALU_writes_sgpr(aco_ptr<Instruction>& instr)
{
- if ((uint32_t) instr->format & (uint32_t) Format::VOPC)
+ if (instr->isVOPC())
return true;
if (instr->isVOP3() && instr->definitions.size() == 2)
return true;
/* VMEMtoScalarWriteHazard
* Handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between.
*/
- if (instr->isVMEM() || instr->format == Format::FLAT || instr->format == Format::GLOBAL ||
- instr->format == Format::SCRATCH || instr->format == Format::DS) {
+ if (instr->isVMEM() || instr->isFlatLike() || instr->isDS()) {
/* Remember all SGPRs that are read by the VMEM instruction */
mark_read_regs(instr, ctx.sgprs_read_by_VMEM);
ctx.sgprs_read_by_VMEM.set(exec);
if (program->wave_size == 64)
ctx.sgprs_read_by_VMEM.set(exec_hi);
- } else if (instr->isSALU() || instr->format == Format::SMEM) {
+ } else if (instr->isSALU() || instr->isSMEM()) {
if (instr->opcode == aco_opcode::s_waitcnt) {
/* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
uint16_t imm = instr->sopp()->imm;
/* VcmpxPermlaneHazard
* Handle any permlane following a VOPC instruction, insert v_mov between them.
*/
- if (instr->format == Format::VOPC) {
+ if (instr->isVOPC()) {
ctx.has_VOPC = true;
} else if (ctx.has_VOPC &&
(instr->opcode == aco_opcode::v_permlane16_b32 ||
/* SMEMtoVectorWriteHazard
* Handle any VALU instruction writing an SGPR after an SMEM reads it.
*/
- if (instr->format == Format::SMEM) {
+ if (instr->isSMEM()) {
/* Remember all SGPRs that are read by the SMEM instruction */
mark_read_regs(instr, ctx.sgprs_read_by_SMEM);
} else if (VALU_writes_sgpr(instr)) {
/* LdsBranchVmemWARHazard
* Handle VMEM/GLOBAL/SCRATCH->branch->DS and DS->branch->VMEM/GLOBAL/SCRATCH patterns.
*/
- if (instr->isVMEM() || instr->format == Format::GLOBAL || instr->format == Format::SCRATCH) {
+ if (instr->isVMEM() || instr->isGlobal() || instr->isScratch()) {
ctx.has_VMEM = true;
ctx.has_branch_after_VMEM = false;
/* Mitigation for DS is needed only if there was already a branch after */
ctx.has_DS = ctx.has_branch_after_DS;
- } else if (instr->format == Format::DS) {
+ } else if (instr->isDS()) {
ctx.has_DS = true;
ctx.has_branch_after_DS = false;
/* Mitigation for VMEM is needed only if there was already a branch after */
};
bool needs_exact(aco_ptr<Instruction>& instr) {
- if (instr->format == Format::MUBUF) {
+ if (instr->isMUBUF()) {
return instr->mubuf()->disable_wqm;
- } else if (instr->format == Format::MTBUF) {
+ } else if (instr->isMTBUF()) {
return instr->mtbuf()->disable_wqm;
- } else if (instr->format == Format::MIMG) {
+ } else if (instr->isMIMG()) {
return instr->mimg()->disable_wqm;
- } else if (instr->format == Format::FLAT || instr->format == Format::GLOBAL) {
+ } else if (instr->isFlatLike()) {
return instr->flatlike()->disable_wqm;
} else {
- return instr->format == Format::EXP;
+ return instr->isEXP();
}
}
}
}
- if (instr->format == Format::PSEUDO_BRANCH && ctx.branch_wqm[block->index]) {
+ if (instr->isBranch() && ctx.branch_wqm[block->index]) {
needs = WQM;
propagate_wqm = true;
}
if (block->kind & block_kind_discard) {
- assert(block->instructions.back()->format == Format::PSEUDO_BRANCH);
+ assert(block->instructions.back()->isBranch());
aco_ptr<Instruction> branch = std::move(block->instructions.back());
block->instructions.pop_back();
continue;
/* Vector Memory reads and writes return in the order they were issued */
- bool has_sampler = instr->format == Format::MIMG && !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4;
+ bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4;
if (instr->isVMEM() && ((it->second.events & vm_events) == event_vmem) &&
it->second.has_vmem_nosampler == !has_sampler && it->second.has_vmem_sampler == has_sampler)
continue;
/* LDS reads and writes return in the order they were issued. same for GDS */
- if (instr->format == Format::DS) {
- if ((it->second.events & lgkm_events) == (instr->ds()->gds ? event_gds : event_lds))
- continue;
- }
+ if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds()->gds ? event_gds : event_lds))
+ continue;
wait.combine(it->second.imm);
}
imm.lgkm = 0;
}
- if (ctx.chip_class >= GFX10 && instr->format == Format::SMEM) {
+ if (ctx.chip_class >= GFX10 && instr->isSMEM()) {
/* GFX10: A store followed by a load at the same address causes a problem because
* the load doesn't load the correct values unless we wait for the store first.
* This is NOT mitigated by an s_nop.
wait_event ev = !instr->definitions.empty() || ctx.chip_class < GFX10 ? event_vmem : event_vmem_store;
update_counters(ctx, ev, get_sync_info(instr));
- bool has_sampler = instr->format == Format::MIMG && !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4;
+ bool has_sampler = instr->isMIMG() && !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4;
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], ev, has_sampler);
update_counters(ctx, event_vmem_gpr_lock);
insert_wait_entry(ctx, instr->operands[3], event_vmem_gpr_lock);
} else if (ctx.chip_class == GFX6 &&
- instr->format == Format::MIMG &&
+ instr->isMIMG() &&
!instr->operands[2].isUndefined()) {
ctx.exp_cnt++;
update_counters(ctx, event_vmem_gpr_lock);
return false;
//TODO: return true if we know we will use vcc
- if ((unsigned)instr->format & (unsigned)Format::VOPC)
+ if (instr->isVOPC())
return false;
if (instr->operands.size() >= 3 && !is_mac)
return false;
bool needs_exec_mask(const Instruction* instr) {
if (instr->isSALU())
return instr->reads_exec();
- if (instr->format == Format::SMEM || instr->isSALU())
+ if (instr->isSMEM() || instr->isSALU())
return false;
- if (instr->format == Format::PSEUDO_BARRIER)
+ if (instr->isBarrier())
return false;
- if (instr->format == Format::PSEUDO) {
+ if (instr->isPseudo()) {
switch (instr->opcode) {
case aco_opcode::p_create_vector:
case aco_opcode::p_extract_vector:
constexpr bool isVALU() const noexcept
{
- return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
- || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
- || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
- || ((uint16_t) format & (uint16_t) Format::VOP3) == (uint16_t) Format::VOP3
- || format == Format::VOP3P;
+ return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P();
}
constexpr bool isSALU() const noexcept
{
- return format == Format::SOP1 ||
- format == Format::SOP2 ||
- format == Format::SOPC ||
- format == Format::SOPK ||
- format == Format::SOPP;
+ return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
}
constexpr bool isVMEM() const noexcept
{
- return format == Format::MTBUF ||
- format == Format::MUBUF ||
- format == Format::MIMG;
- }
-
- constexpr bool isFlatOrGlobal() const noexcept
- {
- return format == Format::FLAT || format == Format::GLOBAL;
+ return isMTBUF() || isMUBUF() || isMIMG();
}
};
static_assert(sizeof(Instruction) == 16, "Unexpected padding");
if (isDPP() || isSDWA())
return true;
- if (format == Format::VOP3P) {
+ if (isVOP3P()) {
const VOP3P_instruction *vop3p = this->vop3p();
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3p->neg_lo[i] || vop3p->neg_hi[i])
auto it = std::find_if(block->instructions.crbegin(), block->instructions.crend(), IsLogicalEnd);
if (it == block->instructions.crend()) {
- assert(block->instructions.back()->format == Format::PSEUDO_BRANCH);
+ assert(block->instructions.back()->isBranch());
block->instructions.insert(std::prev(block->instructions.end()), std::move(instr));
} else {
block->instructions.insert(std::prev(it.base()), std::move(instr));
Block& block = ctx.program->blocks[entry.first];
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
--it;
- assert((*it)->format == Format::PSEUDO_BRANCH);
+ assert((*it)->isBranch());
Builder bld(ctx.program);
bld.reset(&block.instructions, it);
for (size_t instr_idx = 0; instr_idx < block->instructions.size(); instr_idx++) {
aco_ptr<Instruction>& instr = block->instructions[instr_idx];
aco_ptr<Instruction> mov;
- if (instr->format == Format::PSEUDO && instr->opcode != aco_opcode::p_unit_test) {
+ if (instr->isPseudo() && instr->opcode != aco_opcode::p_unit_test) {
Pseudo_instruction *pi = instr->pseudo();
switch (instr->opcode)
default:
break;
}
- } else if (instr->format == Format::PSEUDO_BRANCH) {
+ } else if (instr->isBranch()) {
Pseudo_branch_instruction* branch = instr->branch();
uint32_t target = branch->target[0];
}
for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) {
- if (inst->format == Format::SOPP) {
+ if (inst->isSOPP()) {
can_remove = false;
} else if (inst->isSALU()) {
num_scalar++;
unreachable("Unknown Pseudo branch instruction!");
}
- } else if (instr->format == Format::PSEUDO_REDUCTION) {
+ } else if (instr->isReduction()) {
Pseudo_reduction_instruction* reduce = instr->reduction();
emit_reduction(&ctx, reduce->opcode, reduce->reduce_op, reduce->cluster_size,
reduce->operands[1].physReg(), // tmp
reduce->operands[2].physReg(), // vtmp
reduce->definitions[2].physReg(), // sitmp
reduce->operands[0], reduce->definitions[0]);
- } else if (instr->format == Format::PSEUDO_BARRIER) {
+ } else if (instr->isBarrier()) {
Pseudo_barrier_instruction* barrier = instr->barrier();
/* Anything larger than a workgroup isn't possible. Anything
return a->pass_flags == b->pass_flags;
/* The results of VOPC depend on the exec mask if used for subgroup operations. */
- if ((uint32_t) a->format & (uint32_t) Format::VOPC && a->pass_flags != b->pass_flags)
+ if (a->isVOPC() && a->pass_flags != b->pass_flags)
return false;
if (a->isVOP3()) {
if (instr->isVOP3())
return true;
- if (instr->format == Format::VOP3P)
+ if (instr->isVOP3P())
return false;
if (instr->operands.size() && instr->operands[0].isLiteral() && ctx.program->chip_class < GFX10)
unsigned get_operand_size(aco_ptr<Instruction>& instr, unsigned index)
{
- if (instr->format == Format::PSEUDO)
+ if (instr->isPseudo())
return instr->operands[index].bytes() * 8u;
else if (instr->opcode == aco_opcode::v_mad_u64_u32 || instr->opcode == aco_opcode::v_mad_i64_i32)
return index == 2 ? 64 : 32;
void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
{
- if (instr->isSALU() || instr->isVALU() || instr->format == Format::PSEUDO) {
+ if (instr->isSALU() || instr->isVALU() || instr->isPseudo()) {
ASSERTED bool all_const = false;
for (Operand& op : instr->operands)
all_const = all_const && (!op.isTemp() || ctx.info[op.tempId()].is_constant_or_literal(32));
}
/* PSEUDO: propagate temporaries */
- if (instr->format == Format::PSEUDO) {
+ if (instr->isPseudo()) {
while (info.is_temp()) {
pseudo_propagate_temp(ctx, instr, info.temp, i);
info = ctx.info[info.temp.id()];
}
/* SALU / PSEUDO: propagate inline constants */
- if (instr->isSALU() || instr->format == Format::PSEUDO) {
+ if (instr->isSALU() || instr->isPseudo()) {
unsigned bits = get_operand_size(instr, i);
- if ((info.is_constant(bits) || (info.is_literal(bits) && instr->format == Format::PSEUDO)) &&
+ if ((info.is_constant(bits) || (info.is_literal(bits) && instr->isPseudo())) &&
!instr->operands[i].isFixed() && alu_can_accept_constant(instr->opcode, i)) {
instr->operands[i] = get_constant_op(ctx, info, bits);
continue;
(!instr->isSDWA() || ctx.program->chip_class >= GFX9)) {
Operand op = get_constant_op(ctx, info, bits);
perfwarn(ctx.program, instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get());
- if (i == 0 || instr->isSDWA() || instr->format == Format::VOP3P ||
+ if (i == 0 || instr->isSDWA() || instr->isVOP3P() ||
instr->opcode == aco_opcode::v_readlane_b32 ||
instr->opcode == aco_opcode::v_writelane_b32) {
instr->operands[i] = op;
}
/* MUBUF: propagate constants and combine additions */
- else if (instr->format == Format::MUBUF) {
+ else if (instr->isMUBUF()) {
MUBUF_instruction *mubuf = instr->mubuf();
Temp base;
uint32_t offset;
}
/* DS: combine additions */
- else if (instr->format == Format::DS) {
+ else if (instr->isDS()) {
DS_instruction *ds = instr->ds();
Temp base;
}
/* SMEM: propagate constants and combine additions */
- else if (instr->format == Format::SMEM) {
+ else if (instr->isSMEM()) {
SMEM_instruction *smem = instr->smem();
Temp base;
}
}
- else if (instr->format == Format::PSEUDO_BRANCH) {
+ else if (instr->isBranch()) {
if (ctx.info[instr->operands[0].tempId()].is_scc_invert()) {
/* Flip the branch instruction to get rid of the scc_invert instruction */
instr->opcode = instr->opcode == aco_opcode::p_cbranch_z ? aco_opcode::p_cbranch_nz : aco_opcode::p_cbranch_z;
if (instr->definitions.empty())
return;
- if ((uint16_t) instr->format & (uint16_t) Format::VOPC) {
+ if (instr->isVOPC()) {
ctx.info[instr->definitions[0].tempId()].set_vopc(instr.get());
return;
}
- if (instr->format == Format::VOP3P) {
+ if (instr->isVOP3P()) {
ctx.info[instr->definitions[0].tempId()].set_vop3p(instr.get());
return;
}
continue;
if (sgpr_idx == 0 || instr->isVOP3() ||
- instr->isSDWA() || instr->format == Format::VOP3P) {
+ instr->isSDWA() || instr->isVOP3P()) {
instr->operands[sgpr_idx] = Operand(sgpr);
} else if (can_swap_operands(instr)) {
instr->operands[sgpr_idx] = instr->operands[0];
}
/* turn packed mul+add into v_pk_fma_f16 */
- assert(mul_instr->format == Format::VOP3P);
+ assert(mul_instr->isVOP3P());
aco_ptr<VOP3P_instruction> fma{create_instruction<VOP3P_instruction>(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)};
VOP3P_instruction* mul = mul_instr->vop3p();
for (unsigned i = 0; i < 2; i++) {
while (apply_omod_clamp(ctx, block, instr)) ;
}
- if (instr->format == Format::VOP3P)
+ if (instr->isVOP3P())
return combine_vop3p(ctx, block, instr);
if (ctx.info[instr->definitions[0].tempId()].is_vcc_hint()) {
}
/* Mark SCC needed, so the uniform boolean transformation won't swap the definitions when it isn't beneficial */
- if (instr->format == Format::PSEUDO_BRANCH &&
+ if (instr->isBranch() &&
instr->operands.size() &&
instr->operands[0].isTemp() &&
instr->operands[0].isFixed() &&
if (instr->isSDWA() || instr->isDPP() ||
(instr->isVOP3() && ctx.program->chip_class < GFX10) ||
- (instr->format == Format::VOP3P && ctx.program->chip_class < GFX10))
+ (instr->isVOP3P() && ctx.program->chip_class < GFX10))
return; /* some encodings can't ever take literals */
/* we do not apply the literals yet as we don't know if it is profitable */
unsigned num_operands = 1;
if (instr->isSALU() ||
(ctx.program->chip_class >= GFX10 &&
- (can_use_VOP3(ctx, instr) || instr->format == Format::VOP3P)))
+ (can_use_VOP3(ctx, instr) || instr->isVOP3P())))
num_operands = instr->operands.size();
/* catch VOP2 with a 3rd SGPR operand (e.g. v_cndmask_b32, v_addc_co_u32) */
else if (instr->isVALU() && instr->operands.size() >= 3)
fprintf(output, " bank_mask:0x%.1x", dpp->bank_mask);
if (dpp->bound_ctrl)
fprintf(output, " bound_ctrl:1");
- } else if ((int)instr->format & (int)Format::SDWA) {
+ } else if (instr->isSDWA()) {
const SDWA_instruction* sdwa = instr->sdwa();
switch (sdwa->omod) {
case 1:
bool *const neg = (bool *)alloca(instr->operands.size() * sizeof(bool));
bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool));
uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t));
- if ((int)instr->format & (int)Format::VOP3) {
+ if (instr->isVOP3()) {
const VOP3_instruction* vop3 = instr->vop3();
for (unsigned i = 0; i < instr->operands.size(); ++i) {
abs[i] = vop3->abs[i];
if (abs[i])
fprintf(output, "|");
- if (instr->format == Format::VOP3P) {
+ if (instr->isVOP3P()) {
const VOP3P_instruction* vop3 = instr->vop3p();
if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) {
fprintf(output, ".%c%c",
/* v_readfirstlane_b32 cannot use SDWA */
if (instr->opcode == aco_opcode::p_as_uniform)
return 4;
- if (instr->format == Format::PSEUDO && chip >= GFX8)
+ if (instr->isPseudo() && chip >= GFX8)
return rc.bytes() % 2 == 0 ? 2 : 1;
if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) {
return rc.bytes() % 2 == 0 ? 2 : 1;
} else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, 1)) {
return 2;
- } else if (instr->format == Format::VOP3P) {
+ } else if (instr->isVOP3P()) {
return 2;
}
void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, unsigned byte, RegClass rc)
{
chip_class chip = ctx.program->chip_class;
- if (instr->format == Format::PSEUDO || byte == 0)
+ if (instr->isPseudo() || byte == 0)
return;
assert(rc.bytes() <= 2);
} else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
instr->vop3()->opsel |= (byte / 2) << idx;
return;
- } else if (instr->format == Format::VOP3P && byte == 2) {
+ } else if (instr->isVOP3P() && byte == 2) {
VOP3P_instruction* vop3p = instr->vop3p();
assert(!(vop3p->opsel_lo & (1 << idx)));
vop3p->opsel_lo |= 1 << idx;
{
chip_class chip = program->chip_class;
- if (instr->format == Format::PSEUDO && chip >= GFX8)
+ if (instr->isPseudo() && chip >= GFX8)
return std::make_pair(rc.bytes() % 2 == 0 ? 2 : 1, rc.bytes());
- else if (instr->format == Format::PSEUDO)
+ else if (instr->isPseudo())
return std::make_pair(4, rc.size() * 4u);
unsigned bytes_written = chip >= GFX10 ? rc.bytes() : 4u;
RegClass rc = instr->definitions[idx].regClass();
chip_class chip = program->chip_class;
- if (instr->format == Format::PSEUDO) {
+ if (instr->isPseudo()) {
return;
} else if (can_use_SDWA(chip, instr)) {
unsigned def_size = instr_info.definition_size[(int)instr->opcode];
else
get_reg_for_operand(ctx, register_file, parallelcopy, instr, operand, i);
- if (instr->format == Format::EXP ||
+ if (instr->isEXP() ||
(instr->isVMEM() && i == 3 && ctx.program->chip_class == GFX6) ||
- (instr->format == Format::DS && instr->ds()->gds)) {
+ (instr->isDS() && instr->ds()->gds)) {
for (unsigned j = 0; j < operand.size(); j++)
ctx.war_hint.set(operand.physReg().reg() + j);
}
} else if (instr->opcode == aco_opcode::s_addk_i32 ||
instr->opcode == aco_opcode::s_mulk_i32) {
instr->definitions[0].setFixed(instr->operands[0].physReg());
- } else if (instr->format == Format::MUBUF &&
+ } else if (instr->isMUBUF() &&
instr->definitions.size() == 1 &&
instr->operands.size() == 4) {
instr->definitions[0].setFixed(instr->operands[3].physReg());
- } else if (instr->format == Format::MIMG &&
+ } else if (instr->isMIMG() &&
instr->definitions.size() == 1 &&
!instr->operands[2].isUndefined()) {
instr->definitions[0].setFixed(instr->operands[2].physReg());
memory_sync_info get_sync_info_with_hack(const Instruction* instr)
{
memory_sync_info sync = get_sync_info(instr);
- if (instr->format == Format::SMEM && !instr->operands.empty() && instr->operands[0].bytes() == 16) {
+ if (instr->isSMEM() && !instr->operands.empty() && instr->operands[0].bytes() == 16) {
// FIXME: currently, it doesn't seem beneficial to omit this due to how our scheduler works
sync.storage = (storage_class)(sync.storage | storage_buffer);
sync.semantics = (memory_semantics)((sync.semantics | semantic_private) & ~semantic_can_reorder);
/* images and buffer/global memory can alias */ //TODO: more precisely, buffer images and buffer/global memory can alias
if (storage & (storage_buffer | storage_image))
storage |= storage_buffer | storage_image;
- if (instr->format == Format::SMEM)
+ if (instr->isSMEM())
query->aliasing_storage_smem |= storage;
else
query->aliasing_storage |= storage;
}
/* don't move exports so that they stay closer together */
- if (instr->format == Format::EXP)
+ if (instr->isEXP())
return hazard_fail_export;
/* don't move non-reorderable instructions */
return hazard_fail_barrier;
/* don't move memory loads/stores past potentially aliasing loads/stores */
- unsigned aliasing_storage = instr->format == Format::SMEM ?
+ unsigned aliasing_storage = instr->isSMEM() ?
query->aliasing_storage_smem :
query->aliasing_storage;
if ((sync.storage & aliasing_storage) && !(sync.semantics & semantic_can_reorder)) {
/* don't use LDS/GDS instructions to hide latency since it can
* significanly worsen LDS scheduling */
- if (candidate->format == Format::DS || !can_move_down) {
+ if (candidate->isDS() || !can_move_down) {
add_to_hazard_query(&hq, candidate.get());
ctx.mv.downwards_skip();
continue;
assert(candidate_idx == ctx.mv.source_idx);
assert(candidate_idx >= 0);
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
- bool is_vmem = candidate->isVMEM() || candidate->isFlatOrGlobal();
+ bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
/* break when encountering another VMEM instruction, logical_start or barriers */
if (candidate->opcode == aco_opcode::p_logical_start)
assert(candidate_idx == ctx.mv.source_idx);
assert(candidate_idx < (int) block->instructions.size());
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
- bool is_vmem = candidate->isVMEM() || candidate->isFlatOrGlobal();
+ bool is_vmem = candidate->isVMEM() || candidate->isFlatLike();
if (candidate->opcode == aco_opcode::p_logical_end)
break;
if (candidate->opcode == aco_opcode::p_logical_start)
break;
- if (candidate->isVMEM() || candidate->format == Format::SMEM || candidate->isFlatOrGlobal())
+ if (candidate->isVMEM() || candidate->isSMEM() || candidate->isFlatLike())
break;
HazardResult haz = perform_hazard_query(&hq, candidate.get(), false);
for (unsigned idx = 0; idx < block->instructions.size(); idx++) {
Instruction* current = block->instructions[idx].get();
- if (block->kind & block_kind_export_end && current->format == Format::EXP) {
+ if (block->kind & block_kind_export_end && current->isEXP()) {
unsigned target = current->exp()->dest;
if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
ctx.mv.current = current;
if (current->definitions.empty())
continue;
- if (current->isVMEM() || current->isFlatOrGlobal()) {
+ if (current->isVMEM() || current->isFlatLike()) {
ctx.mv.current = current;
schedule_VMEM(ctx, block, live_vars.register_demand[block->index], current, idx);
}
- if (current->format == Format::SMEM) {
+ if (current->isSMEM()) {
ctx.mv.current = current;
schedule_SMEM(ctx, block, live_vars.register_demand[block->index], current, idx);
}
if (instr->format != Format::VOP1 && instr->format != Format::SOP1 && instr->format != Format::PSEUDO && instr->format != Format::SOPK)
return false;
/* TODO: pseudo-instruction rematerialization is only supported for p_create_vector/p_parallelcopy */
- if (instr->format == Format::PSEUDO && instr->opcode != aco_opcode::p_create_vector &&
+ if (instr->isPseudo() && instr->opcode != aco_opcode::p_create_vector &&
instr->opcode != aco_opcode::p_parallelcopy)
return false;
- if (instr->format == Format::SOPK && instr->opcode != aco_opcode::s_movk_i32)
+ if (instr->isSOPK() && instr->opcode != aco_opcode::s_movk_i32)
return false;
for (const Operand& op : instr->operands) {
std::map<Temp, remat_info>::iterator remat = ctx.remat.find(tmp);
if (remat != ctx.remat.end()) {
Instruction *instr = remat->second.instr;
- assert((instr->format == Format::VOP1 || instr->format == Format::SOP1 || instr->format == Format::PSEUDO || instr->format == Format::SOPK) && "unsupported");
+ assert((instr->isVOP1() || instr->isSOP1() || instr->isPseudo() || instr->isSOPK()) && "unsupported");
assert((instr->format != Format::PSEUDO || instr->opcode == aco_opcode::p_create_vector || instr->opcode == aco_opcode::p_parallelcopy) && "unsupported");
assert(instr->definitions.size() == 1 && "unsupported");
aco_ptr<Instruction> res;
- if (instr->format == Format::VOP1) {
+ if (instr->isVOP1()) {
res.reset(create_instruction<VOP1_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
- } else if (instr->format == Format::SOP1) {
+ } else if (instr->isSOP1()) {
res.reset(create_instruction<SOP1_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
- } else if (instr->format == Format::PSEUDO) {
+ } else if (instr->isPseudo()) {
res.reset(create_instruction<Pseudo_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
- } else if (instr->format == Format::SOPK) {
+ } else if (instr->isSOPK()) {
res.reset(create_instruction<SOPK_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
res->sopk()->imm = instr->sopk()->imm;
}
Block& block = ctx.program->blocks[entry.first];
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
--it;
- assert((*it)->format == Format::PSEUDO_BRANCH);
+ assert((*it)->isBranch());
aco_ptr<Pseudo_instruction> pc{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, entry.second.size(), entry.second.size())};
unsigned i = 0;
for (std::pair<Definition, Operand>& pair : entry.second)
ctx.program->blocks[succ_idx].linear_preds[i] = pred->index;
Pseudo_branch_instruction *branch = pred->instructions.back()->branch();
- assert(branch->format == Format::PSEUDO_BRANCH);
+ assert(branch->isBranch());
branch->target[0] = succ_idx;
branch->target[1] = succ_idx;
}
program->statistics[statistic_instructions] += block.instructions.size();
for (aco_ptr<Instruction>& instr : block.instructions) {
- if (instr->format == Format::SOPP && instr->sopp()->block != -1)
+ if (instr->isSOPP() && instr->sopp()->block != -1)
program->statistics[statistic_branches]++;
if (instr->opcode == aco_opcode::p_constaddr)
vmem_clause_res.clear();
}
- if (instr->format == Format::SMEM && !instr->operands.empty()) {
+ if (instr->isSMEM() && !instr->operands.empty()) {
if (instr->operands[0].size() == 2)
smem_clause_res.insert(Temp(0, s2));
else
check(base_format == instr_info.format[(int)instr->opcode], "Wrong base format for instruction", instr.get());
/* check VOP3 modifiers */
- if (((uint32_t)instr->format & (uint32_t)Format::VOP3) && instr->format != Format::VOP3) {
+ if (instr->isVOP3() && instr->format != Format::VOP3) {
check(base_format == Format::VOP2 ||
base_format == Format::VOP1 ||
base_format == Format::VOPC ||
/* check for undefs */
for (unsigned i = 0; i < instr->operands.size(); i++) {
if (instr->operands[i].isUndefined()) {
- bool flat = instr->format == Format::FLAT || instr->format == Format::SCRATCH || instr->format == Format::GLOBAL;
- bool can_be_undef = is_phi(instr) || instr->format == Format::EXP ||
- instr->format == Format::PSEUDO_REDUCTION ||
+ bool flat = instr->isFlatLike();
+ bool can_be_undef = is_phi(instr) || instr->isEXP() ||
+ instr->isReduction() ||
instr->opcode == aco_opcode::p_create_vector ||
- (flat && i == 1) || (instr->format == Format::MIMG && (i == 1 || i == 2)) ||
- ((instr->format == Format::MUBUF || instr->format == Format::MTBUF) && i == 1);
+ (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
+ ((instr->isMUBUF() || instr->isMTBUF()) && i == 1);
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
} else {
check(instr->operands[i].isFixed() || instr->operands[i].isTemp() || instr->operands[i].isConstant(), "Uninitialized Operand", instr.get());
/* check subdword definitions */
for (unsigned i = 0; i < instr->definitions.size(); i++) {
if (instr->definitions[i].regClass().is_subdword())
- check(instr->format == Format::PSEUDO || instr->definitions[i].bytes() <= 4, "Only Pseudo instructions can write subdword registers larger than 4 bytes", instr.get());
+ check(instr->isPseudo() || instr->definitions[i].bytes() <= 4, "Only Pseudo instructions can write subdword registers larger than 4 bytes", instr.get());
}
if (instr->isSALU() || instr->isVALU()) {
if (!op.isLiteral())
continue;
- check(instr->format == Format::SOP1 ||
- instr->format == Format::SOP2 ||
- instr->format == Format::SOPC ||
- instr->format == Format::VOP1 ||
- instr->format == Format::VOP2 ||
- instr->format == Format::VOPC ||
+ check(instr->isSOP1() || instr->isSOP2() || instr->isSOPC() ||
+ instr->isVOP1() || instr->isVOP2() || instr->isVOPC() ||
(instr->isVOP3() && program->chip_class >= GFX10) ||
- (instr->format == Format::VOP3P && program->chip_class >= GFX10),
+ (instr->isVOP3P() && program->chip_class >= GFX10),
"Literal applied on wrong instruction format", instr.get());
check(literal.isUndefined() || (literal.size() == op.size() && literal.constantValue() == op.constantValue()), "Only 1 Literal allowed", instr.get());
literal = op;
- check(instr->isSALU() || instr->isVOP3() || instr->format == Format::VOP3P || i == 0 || i == 2, "Wrong source position for Literal argument", instr.get());
+ check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2, "Wrong source position for Literal argument", instr.get());
}
/* check num sgprs for VALU */
if (program->chip_class >= GFX10 && !is_shift64)
const_bus_limit = 2;
- uint32_t scalar_mask = instr->isVOP3() || instr->format == Format::VOP3P ? 0x7 : 0x5;
+ uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
if (instr->isSDWA())
scalar_mask = program->chip_class >= GFX9 ? 0x7 : 0x4;
- if ((int) instr->format & (int) Format::VOPC ||
+ if (instr->isVOPC() ||
instr->opcode == aco_opcode::v_readfirstlane_b32 ||
instr->opcode == aco_opcode::v_readlane_b32 ||
instr->opcode == aco_opcode::v_readlane_b32_e64) {
check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit, "Too many SGPRs/literals", instr.get());
}
- if (instr->format == Format::SOP1 || instr->format == Format::SOP2) {
+ if (instr->isSOP1() || instr->isSOP2()) {
check(instr->definitions[0].getTemp().type() == RegType::sgpr, "Wrong Definition type for SALU instruction", instr.get());
for (const Operand& op : instr->operands) {
check(op.isConstant() || op.regClass().type() <= RegType::sgpr,
if (instr->opcode == aco_opcode::p_as_uniform)
return byte == 0;
- if (instr->format == Format::PSEUDO && chip >= GFX8)
+ if (instr->isPseudo() && chip >= GFX8)
return true;
if (instr->isSDWA() && (instr->sdwa()->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
return true;
Definition def = instr->definitions[0];
unsigned byte = def.physReg().byte();
- if (instr->format == Format::PSEUDO && chip >= GFX8)
+ if (instr->isPseudo() && chip >= GFX8)
return true;
if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
return true;
chip_class chip = program->chip_class;
Definition def = instr->definitions[index];
- if (instr->format == Format::PSEUDO)
+ if (instr->isPseudo())
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
return def.bytes();