return false;
}
-/* v_and(a, not(b)) -> v_bfi_b32(b, 0, a) */
+/* v_and(a, not(b)) -> v_bfi_b32(b, 0, a)
+ * v_or(a, not(b)) -> v_bfi_b32(b, a, -1)
+ */
bool
-combine_v_and_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
+combine_v_andor_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (instr->usesModifiers())
return false;
Operand::zero(),
instr->operands[!i],
};
+ if (instr->opcode == aco_opcode::v_or_b32) {
+ ops[1] = instr->operands[!i];
+ ops[2] = Operand::c32(-1);
+ }
if (!check_vop3_operands(ctx, 3, ops))
continue;
Instruction* new_instr =
create_instruction<VALU_instruction>(aco_opcode::v_bfi_b32, Format::VOP3, 3, 1);
- new_instr->operands[0] = copy_operand(ctx, op_instr->operands[0]);
- new_instr->operands[1] = Operand::zero();
- new_instr->operands[2] = instr->operands[!i];
+ if (op_instr->operands[0].isTemp())
+ ctx.uses[op_instr->operands[0].tempId()]++;
+ for (unsigned j = 0; j < 3; j++)
+ new_instr->operands[j] = ops[j];
new_instr->definitions[0] = instr->definitions[0];
new_instr->pass_flags = instr->pass_flags;
instr.reset(new_instr);
} else if (combine_three_valu_op(ctx, instr, aco_opcode::v_or_b32, aco_opcode::v_or3_b32,
"012", 1 | 2)) {
} else if (combine_add_or_then_and_lshl(ctx, instr)) {
+ } else if (combine_v_andor_not(ctx, instr)) {
}
} else if (instr->opcode == aco_opcode::v_xor_b32 && ctx.program->gfx_level >= GFX10) {
if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, aco_opcode::v_xor3_b32, "012",
combine_sabsdiff(ctx, instr);
} else if (instr->opcode == aco_opcode::v_and_b32) {
if (combine_and_subbrev(ctx, instr)) {
- } else if (combine_v_and_not(ctx, instr)) {
+ } else if (combine_v_andor_not(ctx, instr)) {
}
} else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) {
/* set existing v_fma_f32 with label_mad so we can create v_fmamk_f32/v_fmaak_f32.