int subvector_begin_pos = -1;
};
-static uint32_t
-get_sdwa_sel(unsigned sel, PhysReg reg)
-{
- if (sel & sdwa_isra) {
- unsigned size = sdwa_rasize & sel;
- if (size == 1)
- return reg.byte();
- else /* size == 2 */
- return sdwa_isword | (reg.byte() >> 1);
- }
- return sel & sdwa_asuint;
-}
-
unsigned
get_mimg_nsa_dwords(const Instruction* instr)
{
}
encoding |= (sdwa.clamp ? 1 : 0) << 13;
} else {
- encoding |= get_sdwa_sel(sdwa.dst_sel, instr->definitions[0].physReg()) << 8;
- uint32_t dst_u = sdwa.dst_sel & sdwa_sext ? 1 : 0;
- if (sdwa.dst_preserve || (sdwa.dst_sel & sdwa_isra))
+ encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;
+ uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0;
+ if (sdwa.dst_preserve)
dst_u = 2;
encoding |= dst_u << 11;
encoding |= (sdwa.clamp ? 1 : 0) << 13;
encoding |= sdwa.omod << 14;
}
- encoding |= get_sdwa_sel(sdwa.sel[0], sdwa_op.physReg()) << 16;
- encoding |= sdwa.sel[0] & sdwa_sext ? 1 << 19 : 0;
+ encoding |= sdwa.sel[0].to_sdwa_sel(sdwa_op.physReg().byte()) << 16;
+ encoding |= sdwa.sel[0].sign_extend() ? 1 << 19 : 0;
encoding |= sdwa.abs[0] << 21;
encoding |= sdwa.neg[0] << 20;
if (instr->operands.size() >= 2) {
- encoding |= get_sdwa_sel(sdwa.sel[1], instr->operands[1].physReg()) << 24;
- encoding |= sdwa.sel[1] & sdwa_sext ? 1 << 27 : 0;
+ encoding |= sdwa.sel[1].to_sdwa_sel(instr->operands[1].physReg().byte()) << 24;
+ encoding |= sdwa.sel[1].sign_extend() ? 1 << 27 : 0;
encoding |= sdwa.abs[1] << 29;
encoding |= sdwa.neg[1] << 28;
}
create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(src);
sdwa->definitions[0] = Definition(tmp);
- if (sign_extend)
- sdwa->sel[0] = src_bits == 8 ? sdwa_sbyte : sdwa_sword;
- else
- sdwa->sel[0] = src_bits == 8 ? sdwa_ubyte : sdwa_uword;
- sdwa->dst_sel = tmp.bytes() == 2 ? sdwa_uword : sdwa_udword;
+ sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
+ sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword;
+ sdwa->dst_preserve = tmp.bytes() == 2;
bld.insert(std::move(sdwa));
} else {
assert(src_bits < 32);
if (i >= 2)
break;
- switch (instr->operands[i].bytes()) {
- case 1: sdwa.sel[i] = sdwa_ubyte; break;
- case 2: sdwa.sel[i] = sdwa_uword; break;
- case 4: sdwa.sel[i] = sdwa_udword; break;
- }
- }
- switch (instr->definitions[0].bytes()) {
- case 1:
- sdwa.dst_sel = sdwa_ubyte;
- sdwa.dst_preserve = true;
- break;
- case 2:
- sdwa.dst_sel = sdwa_uword;
- sdwa.dst_preserve = true;
- break;
- case 4: sdwa.dst_sel = sdwa_udword; break;
+ sdwa.sel[i] = SubdwordSel(instr->operands[i].bytes(), 0, false);
}
+ sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
+ sdwa.dst_preserve = sdwa.dst_sel.size() < 4;
+
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
instr->definitions[0].setFixed(vcc);
if (instr->definitions.size() >= 2)
};
static_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
-enum sdwa_sel : uint8_t {
- /* masks */
- sdwa_wordnum = 0x1,
- sdwa_bytenum = 0x3,
- sdwa_asuint = 0x7 | 0x10,
- sdwa_rasize = 0x3,
-
- /* flags */
- sdwa_isword = 0x4,
- sdwa_sext = 0x8,
- sdwa_isra = 0x10,
-
- /* specific values */
- sdwa_ubyte0 = 0,
- sdwa_ubyte1 = 1,
- sdwa_ubyte2 = 2,
- sdwa_ubyte3 = 3,
- sdwa_uword0 = sdwa_isword | 0,
- sdwa_uword1 = sdwa_isword | 1,
- sdwa_udword = 6,
-
- sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
- sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
- sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
- sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
- sdwa_sword0 = sdwa_uword0 | sdwa_sext,
- sdwa_sword1 = sdwa_uword1 | sdwa_sext,
- sdwa_sdword = sdwa_udword | sdwa_sext,
-
- /* register-allocated */
- sdwa_ubyte = 1 | sdwa_isra,
- sdwa_uword = 2 | sdwa_isra,
- sdwa_sbyte = sdwa_ubyte | sdwa_sext,
- sdwa_sword = sdwa_uword | sdwa_sext,
+struct SubdwordSel {
+ enum sdwa_sel : uint8_t {
+ ubyte = 0x4,
+ uword = 0x8,
+ dword = 0x10,
+ sext = 0x20,
+ sbyte = ubyte | sext,
+ sword = uword | sext,
+
+ ubyte0 = ubyte,
+ ubyte1 = ubyte | 1,
+ ubyte2 = ubyte | 2,
+ ubyte3 = ubyte | 3,
+ sbyte0 = sbyte,
+ sbyte1 = sbyte | 1,
+ sbyte2 = sbyte | 2,
+ sbyte3 = sbyte | 3,
+ uword0 = uword,
+ uword1 = uword | 2,
+ sword0 = sword,
+ sword1 = sword | 2,
+ };
+
+ SubdwordSel() : sel((sdwa_sel)0) {}
+ constexpr SubdwordSel(sdwa_sel sel_) : sel(sel_) {}
+ constexpr SubdwordSel(unsigned size, unsigned offset, bool sign_extend)
+ : sel((sdwa_sel)((sign_extend ? sext : 0) | size << 2 | offset))
+ {}
+ constexpr operator sdwa_sel() const { return sel; }
+ explicit operator bool() const { return sel != 0; }
+
+ constexpr unsigned size() const { return (sel >> 2) & 0x7; }
+ constexpr unsigned offset() const { return sel & 0x3; }
+ constexpr bool sign_extend() const { return sel & sext; }
+ constexpr unsigned to_sdwa_sel(unsigned reg_byte_offset) const
+ {
+ reg_byte_offset += offset();
+ if (size() == 1)
+ return reg_byte_offset;
+ else if (size() == 2)
+ return 4 + (reg_byte_offset >> 1);
+ else
+ return 6;
+ }
+
+private:
+ sdwa_sel sel;
};
/**
struct SDWA_instruction : public Instruction {
/* these destination modifiers aren't available with VOPC except for
* clamp on GFX8 */
- uint8_t sel[2];
- uint8_t dst_sel;
+ SubdwordSel sel[2];
+ SubdwordSel dst_sel;
bool neg[2];
bool abs[2];
bool dst_preserve : 1;
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
- if (reduce_op == imin8 || reduce_op == imax8)
- sdwa->sel[0] = sdwa_sbyte;
- else
- sdwa->sel[0] = sdwa_ubyte;
- sdwa->dst_sel = sdwa_udword;
+ bool sext = reduce_op == imin8 || reduce_op == imax8;
+ sdwa->sel[0] = SubdwordSel(1, 0, sext);
+ sdwa->dst_sel = SubdwordSel::dword;
bld.insert(std::move(sdwa));
} else {
aco_opcode opcode;
aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
- if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16)
- sdwa->sel[0] = sdwa_sword;
- else
- sdwa->sel[0] = sdwa_uword;
- sdwa->dst_sel = sdwa_udword;
+ bool sext = reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16;
+ sdwa->sel[0] = SubdwordSel(2, 0, sext);
+ sdwa->dst_sel = SubdwordSel::dword;
bld.insert(std::move(sdwa));
} else if (ctx->program->chip_class == GFX6 || ctx->program->chip_class == GFX7) {
aco_opcode opcode;
sdwa->operands[0] = Operand(op.physReg().advance(-op.physReg().byte()),
RegClass::get(op.regClass().type(), 4));
sdwa->definitions[0] = dst;
- sdwa->sel[0] = sdwa_ubyte0 + op.physReg().byte() + index;
- if (signext)
- sdwa->sel[0] |= sdwa_sext;
- sdwa->dst_sel = sdwa_uword;
+ sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
+ sdwa->dst_sel = SubdwordSel::uword;
+ sdwa->dst_preserve = true;
bld.insert(std::move(sdwa));
}
break;
(Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)};
sdwa->operands[0] = op;
sdwa->definitions[0] = dst;
- sdwa->sel[0] = sdwa_udword;
- sdwa->dst_sel = (bits == 8 ? sdwa_ubyte0 : sdwa_uword0) + (offset / bits);
+ sdwa->sel[0] = SubdwordSel::dword;
+ sdwa->dst_sel = SubdwordSel(bits / 8, offset / 8, false);
bld.insert(std::move(sdwa));
} else {
bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits));
RegClass::get(op.regClass().type(), 4));
bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), sdwa_op)
.instr->sdwa()
- .sel[1] = sdwa_ubyte0 + op.physReg().byte();
+ .sel[1] = SubdwordSel(1, op.physReg().byte(), false);
}
break;
}
res = ''
if self == Format.SDWA:
for i in range(min(num_operands, 2)):
- res += 'instr->sel[{0}] = op{0}.op.bytes() == 2 ? sdwa_uword : (op{0}.op.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'.format(i)
- res += 'instr->dst_sel = def0.bytes() == 2 ? sdwa_uword : (def0.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'
+ res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
+ res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
res += 'if (def0.bytes() < 4) instr->dst_preserve = true;'
return res
return op.isFixed() && op.physReg() == exec;
}
-int
+SubdwordSel
parse_extract(Instruction* instr)
{
if (instr->opcode == aco_opcode::p_extract) {
- bool is_byte = instr->operands[2].constantEquals(8);
- unsigned index = instr->operands[1].constantValue();
- unsigned sel = (is_byte ? sdwa_ubyte0 : sdwa_uword0) + index;
- if (!instr->operands[3].constantEquals(0))
- sel |= sdwa_sext;
- return sel;
+ unsigned size = instr->operands[2].constantValue() / 8;
+ unsigned offset = instr->operands[1].constantValue() * size;
+ bool sext = instr->operands[3].constantEquals(1);
+ return SubdwordSel(size, offset, sext);
} else if (instr->opcode == aco_opcode::p_insert && instr->operands[1].constantEquals(0)) {
- return instr->operands[2].constantEquals(8) ? sdwa_ubyte0 : sdwa_uword0;
+ return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
} else {
- return -1;
+ return SubdwordSel();
}
}
-int
+SubdwordSel
parse_insert(Instruction* instr)
{
if (instr->opcode == aco_opcode::p_extract && instr->operands[3].constantEquals(0) &&
instr->operands[1].constantEquals(0)) {
- return instr->operands[2].constantEquals(8) ? sdwa_ubyte0 : sdwa_uword0;
+ return instr->operands[2].constantEquals(8) ? SubdwordSel::ubyte : SubdwordSel::uword;
} else if (instr->opcode == aco_opcode::p_insert) {
- bool is_byte = instr->operands[2].constantEquals(8);
- unsigned index = instr->operands[1].constantValue();
- unsigned sel = (is_byte ? sdwa_ubyte0 : sdwa_uword0) + index;
- return sel;
+ unsigned size = instr->operands[2].constantValue() / 8;
+ unsigned offset = instr->operands[1].constantValue() * size;
+ return SubdwordSel(size, offset, false);
} else {
- return -1;
+ return SubdwordSel();
}
}
return false;
Temp tmp = info.instr->operands[0].getTemp();
- unsigned sel = parse_extract(info.instr);
+ SubdwordSel sel = parse_extract(info.instr);
- if (sel == sdwa_udword || sel == sdwa_sdword) {
+ if (!sel) {
+ return false;
+ } else if (sel.size() == 4) {
return true;
- } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel <= sdwa_ubyte3) {
+ } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
return true;
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
- if (instr->isSDWA() &&
- (static_cast<SDWA_instruction*>(instr.get())->sel[idx] & sdwa_asuint) != sdwa_udword)
+ if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
return false;
return true;
- } else if (instr->isVOP3() && (sel & sdwa_isword) &&
- can_use_opsel(ctx.program->chip_class, instr->opcode, idx, (sel & sdwa_wordnum)) &&
+ } else if (instr->isVOP3() && sel.size() == 2 &&
+ can_use_opsel(ctx.program->chip_class, instr->opcode, idx, sel.offset()) &&
!(instr->vop3().opsel & (1 << idx))) {
return true;
} else {
apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
{
Temp tmp = info.instr->operands[0].getTemp();
- unsigned sel = parse_extract(info.instr);
-
- if (sel == sdwa_udword || sel == sdwa_sdword) {
- } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel <= sdwa_ubyte3) {
- switch (sel) {
- case sdwa_ubyte0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
- case sdwa_ubyte1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
- case sdwa_ubyte2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
- case sdwa_ubyte3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
+ SubdwordSel sel = parse_extract(info.instr);
+ assert(sel);
+
+ if (sel.size() == 4) {
+ /* full dword selection */
+ } else if (instr->opcode == aco_opcode::v_cvt_f32_u32 && sel.size() == 1 && !sel.sign_extend()) {
+ switch (sel.offset()) {
+ case 0: instr->opcode = aco_opcode::v_cvt_f32_ubyte0; break;
+ case 1: instr->opcode = aco_opcode::v_cvt_f32_ubyte1; break;
+ case 2: instr->opcode = aco_opcode::v_cvt_f32_ubyte2; break;
+ case 3: instr->opcode = aco_opcode::v_cvt_f32_ubyte3; break;
}
} else if (can_use_SDWA(ctx.program->chip_class, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->chip_class >= GFX9)) {
to_SDWA(ctx, instr);
static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
} else if (instr->isVOP3()) {
- if (sel & sdwa_wordnum)
+ if (sel.offset())
instr->vop3().opsel |= 1 << idx;
}
can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode];
if (instr->isSDWA())
- can_use_mod = can_use_mod && (instr->sdwa().sel[i] & sdwa_asuint) == sdwa_udword;
+ can_use_mod = can_use_mod && instr->sdwa().sel[i].size() == 4;
else
can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr));
case aco_opcode::p_extract: {
if (instr->definitions[0].bytes() == 4) {
ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
- if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()) >= 0)
+ if (instr->operands[0].regClass() == v1 && parse_insert(instr.get()))
ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
}
break;
if (instr->operands[0].bytes() == 4) {
if (instr->operands[0].regClass() == v1)
ctx.info[instr->operands[0].tempId()].set_insert(instr.get());
- if (parse_extract(instr.get()) >= 0)
+ if (parse_extract(instr.get()))
ctx.info[instr->definitions[0].tempId()].set_extract(instr.get());
ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
}
/* MADs/FMAs are created later, so we don't have to update the original add */
assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
- unsigned sel = parse_insert(def_info.instr);
+ SubdwordSel sel = parse_insert(def_info.instr);
+ assert(sel);
- if (instr->isVOP3() && (sel & sdwa_isword) && !(sel & sdwa_sext) &&
- can_use_opsel(ctx.program->chip_class, instr->opcode, 3, (sel & sdwa_wordnum))) {
+ if (instr->isVOP3() && sel.size() == 2 && !sel.sign_extend() &&
+ can_use_opsel(ctx.program->chip_class, instr->opcode, 3, sel.offset())) {
if (instr->vop3().opsel & (1 << 3))
return false;
- if (sel & sdwa_wordnum)
+ if (sel.offset())
instr->vop3().opsel |= 1 << 3;
} else {
if (!can_use_SDWA(ctx.program->chip_class, instr, true))
return false;
to_SDWA(ctx, instr);
- if ((static_cast<SDWA_instruction*>(instr.get())->dst_sel & sdwa_asuint) != sdwa_udword)
+ if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
}
fprintf(output, " clamp");
if (instr->isVOPC())
return;
- switch (sdwa.dst_sel & sdwa_asuint) {
- case sdwa_udword: break;
- case sdwa_ubyte0:
- case sdwa_ubyte1:
- case sdwa_ubyte2:
- case sdwa_ubyte3:
- fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
- sdwa.dst_sel & sdwa_bytenum);
- break;
- case sdwa_uword0:
- case sdwa_uword1:
- fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
- sdwa.dst_sel & sdwa_wordnum);
- break;
+ if (instr->definitions[0].bytes() == 4) {
+ char sext = sdwa.dst_sel.sign_extend() ? 's' : 'u';
+ switch (sdwa.dst_sel.size()) {
+ case 1: fprintf(output, " dst_sel:%cbyte%u", sext, sdwa.dst_sel.offset()); break;
+ case 2: fprintf(output, " dst_sel:%cword%u", sext, sdwa.dst_sel.offset() >> 1); break;
+ case 4: break;
+ default: break;
+ }
}
if (sdwa.dst_preserve)
fprintf(output, " dst_preserve");
bool* const abs = (bool*)alloca(instr->operands.size() * sizeof(bool));
bool* const neg = (bool*)alloca(instr->operands.size() * sizeof(bool));
bool* const opsel = (bool*)alloca(instr->operands.size() * sizeof(bool));
- uint8_t* const sel = (uint8_t*)alloca(instr->operands.size() * sizeof(uint8_t));
+ SubdwordSel* const sel = (SubdwordSel*)alloca(instr->operands.size() * sizeof(SubdwordSel));
for (unsigned i = 0; i < instr->operands.size(); ++i) {
abs[i] = false;
neg[i] = false;
opsel[i] = false;
- sel[i] = sdwa_udword;
+ sel[i] = SubdwordSel::dword;
}
if (instr->isVOP3()) {
const VOP3_instruction& vop3 = instr->vop3();
abs[i] = vop3.abs[i];
neg[i] = vop3.neg[i];
opsel[i] = vop3.opsel & (1 << i);
- sel[i] = sdwa_udword;
}
} else if (instr->isDPP()) {
const DPP_instruction& dpp = instr->dpp();
abs[i] = dpp.abs[i];
neg[i] = dpp.neg[i];
opsel[i] = false;
- sel[i] = sdwa_udword;
}
} else if (instr->isSDWA()) {
const SDWA_instruction& sdwa = instr->sdwa();
fprintf(output, "|");
if (opsel[i])
fprintf(output, "hi(");
- else if (sel[i] & sdwa_sext)
+ else if (sel[i].sign_extend())
fprintf(output, "sext(");
aco_print_operand(&instr->operands[i], output, flags);
- if (opsel[i] || (sel[i] & sdwa_sext))
+ if (opsel[i] || (sel[i].sign_extend()))
fprintf(output, ")");
- if (!(sel[i] & sdwa_isra)) {
- if (sel[i] == sdwa_udword || sel[i] == sdwa_sdword) {
- /* print nothing */
- } else if (sel[i] & sdwa_isword) {
- unsigned index = sel[i] & sdwa_wordnum;
- fprintf(output, "[%u:%u]", index * 16, index * 16 + 15);
- } else {
- unsigned index = sel[i] & sdwa_bytenum;
- fprintf(output, "[%u:%u]", index * 8, index * 8 + 7);
- }
+ if (instr->isSDWA() && i < 2 && sel[i].size() < 4 && instr->operands[i].bytes() == 4) {
+ unsigned begin = sel[i].offset() * 8;
+ unsigned end = begin + sel[i].size() * 8 - 1;
+ fprintf(output, "[%u:%u]", begin, end);
}
if (abs[i])
fprintf(output, "|");
check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
program->chip_class >= GFX9,
"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
+ } else {
+ const Definition& def = instr->definitions[0];
+ check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
+ instr.get());
+ check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
+ "SDWA definition selection size must be at most definition size", instr.get());
+ check(
+ sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
+ "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
+ check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
+ instr.get());
+ check(def.bytes() == 4 || sdwa.dst_preserve,
+ "SDWA subdword definition needs dst_preserve", instr.get());
}
for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
const Operand& op = instr->operands[i];
check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
- if (sdwa.sel[i] & sdwa_isra)
- check(op.bytes() >= (sdwa.sel[i] & sdwa_rasize),
- "SDWA selection size must be at most operand size", instr.get());
- else
- check(op.bytes() == 4, "SDWA selection needs dword operand", instr.get());
+ check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
+ "SDWA operand selection size must be at most operand size", instr.get());
+ check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
+ "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
+ check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
+ instr.get());
}
if (instr->operands.size() >= 3) {
check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
(instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
-
- if (instr->definitions[0].regClass().is_subdword())
- check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()),
- "Unexpected SDWA sel for sub-dword definition", instr.get());
}
/* check opsel */
return byte == 0;
if (instr->isPseudo() && chip >= GFX8)
return true;
- if (instr->isSDWA()) {
- unsigned size = instr->sdwa().sel[index] & sdwa_rasize;
- return byte % size == 0;
- }
+ if (instr->isSDWA())
+ return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
+ byte % instr->sdwa().sel[index].size() == 0;
if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
return true;
if (instr->isPseudo() && chip >= GFX8)
return true;
- if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
- return true;
+ if (instr->isSDWA())
+ return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
+ byte % instr->sdwa().dst_sel.size() == 0;
if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
return true;
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
if (instr->isVALU()) {
assert(def.bytes() <= 2);
-
- if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
- return def.bytes();
+ if (instr->isSDWA())
+ return instr->sdwa().dst_sel.size();
if (instr_is_16bit(chip, instr->opcode))
return 2;
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
sdwa->dst_preserve = true;
- sdwa->dst_sel = sdwa_ubyte0;
+ sdwa->dst_sel = SubdwordSel::ubyte0;
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
- sdwa->sel[0] = sdwa_sbyte2;
- sdwa->sel[1] = sdwa_uword1;
+ sdwa->sel[0] = SubdwordSel::sbyte2;
+ sdwa->sel[1] = SubdwordSel::uword1;
finish_validator_test();
}
//>> p_unit_test 4
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
- //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7)
+ //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7) dst_preserve
EXT(0, 0)
- //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23)
+ //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23) dst_preserve
if (i != GFX7)
EXT(0, 2)
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
- //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15)
+ //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15) dst_preserve
EXT(1, 0)
- //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31)
+ //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31) dst_preserve
if (i != GFX7)
EXT(1, 2)