return res;
}
+void
+emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp)
+{
+ Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
+ bool has_sgpr = false;
+ for (unsigned i = 0; i < 3; i++) {
+ src[i] = get_alu_src(ctx, instr->src[i]);
+ if (has_sgpr)
+ src[i] = as_vgpr(ctx, src[i]);
+ else
+ has_sgpr = src[i].type() == RegType::sgpr;
+ }
+
+ Builder bld(ctx->program, ctx->block);
+ bld.is_precise = instr->exact;
+ bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p().clamp = clamp;
+}
+
void
emit_vop1_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst)
{
}
break;
}
+ case nir_op_sdot_4x8_iadd: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, false);
+ break;
+ }
+ case nir_op_sdot_4x8_iadd_sat: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, true);
+ break;
+ }
+ case nir_op_udot_4x8_uadd: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_u32_u8, dst, false);
+ break;
+ }
+ case nir_op_udot_4x8_uadd_sat: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_u32_u8, dst, true);
+ break;
+ }
+ case nir_op_sdot_2x16_iadd: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_i32_i16, dst, false);
+ break;
+ }
+ case nir_op_sdot_2x16_iadd_sat: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_i32_i16, dst, true);
+ break;
+ }
+ case nir_op_udot_2x16_uadd: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_u32_u16, dst, false);
+ break;
+ }
+ case nir_op_udot_2x16_uadd_sat: {
+ emit_idot_instruction(ctx, instr, aco_opcode::v_dot2_u32_u16, dst, true);
+ break;
+ }
case nir_op_cube_face_coord_amd: {
Temp in = get_alu_src(ctx, instr->src[0], 3);
Temp src[3] = {emit_extract_vector(ctx, in, 0, v1), emit_extract_vector(ctx, in, 1, v1),
case nir_op_cube_face_index_amd:
case nir_op_cube_face_coord_amd:
case nir_op_sad_u8x4:
- case nir_op_iadd_sat: type = RegType::vgpr; break;
+ case nir_op_iadd_sat:
+ case nir_op_udot_4x8_uadd:
+ case nir_op_sdot_4x8_iadd:
+ case nir_op_udot_4x8_uadd_sat:
+ case nir_op_sdot_4x8_iadd_sat:
+ case nir_op_udot_2x16_uadd:
+ case nir_op_sdot_2x16_iadd:
+ case nir_op_udot_2x16_uadd_sat:
+ case nir_op_sdot_2x16_iadd_sat: type = RegType::vgpr; break;
case nir_op_f2i16:
case nir_op_f2u16:
case nir_op_f2i32:
(0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False),
(0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False),
(0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False),
+ ( -1, -1, -1, 0x39, 0x0d, "v_dot4c_i32_i8", False),
(0x0d, 0x0d, -1, -1, -1, "v_min_legacy_f32", True),
(0x0e, 0x0e, -1, -1, -1, "v_max_legacy_f32", True),
(0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True),
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)
for (code, name, modifiers) in VOPP:
opcode(name, -1, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers)
+opcode("v_dot2_i32_i16", -1, 0x26, 0x14, Format.VOP3P, InstrClass.Valu32)
+opcode("v_dot2_u32_u16", -1, 0x27, 0x15, Format.VOP3P, InstrClass.Valu32)
+opcode("v_dot4_i32_i8", -1, 0x28, 0x16, Format.VOP3P, InstrClass.Valu32)
+opcode("v_dot4_u32_u8", -1, 0x29, 0x17, Format.VOP3P, InstrClass.Valu32)
# VINTERP instructions:
instr->opcode == aco_opcode::v_mad_f16 ||
instr->opcode == aco_opcode::v_mad_legacy_f16 ||
(instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) ||
- (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10)) &&
+ (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) ||
+ (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
instr->operands[2].getTemp().type() == RegType::vgpr && instr->operands[1].isTemp() &&
instr->operands[1].getTemp().type() == RegType::vgpr && !instr->usesModifiers() &&
case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
+ case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
default: break;
}
}
instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 ||
instr->opcode == aco_opcode::v_pk_fmac_f16 ||
instr->opcode == aco_opcode::v_writelane_b32 ||
- instr->opcode == aco_opcode::v_writelane_b32_e64) {
+ instr->opcode == aco_opcode::v_writelane_b32_e64 ||
+ instr->opcode == aco_opcode::v_dot4c_i32_i8) {
instr->definitions[0].setFixed(instr->operands[2].physReg());
} else if (instr->opcode == aco_opcode::s_addk_i32 ||
instr->opcode == aco_opcode::s_mulk_i32) {