uint8_t AluInstr::allowed_src_chan_mask() const
{
if (m_alu_slots < 2)
- return 0xf;
+ return 0xf;
+
int chan_use_count[4] = {0};
for (auto s : m_src) {
static bool
emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
static bool
+emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader);
+static bool
emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
static bool
case nir_op_fdph:
return emit_fdph(*alu, shader);
case nir_op_fdot2:
- return emit_dot(*alu, 2, shader);
+ if (shader.chip_class() >= ISA_CC_EVERGREEN)
+ return emit_dot(*alu, 2, shader);
+ else
+ return emit_dot4(*alu, 2, shader);
case nir_op_fdot3:
- return emit_dot(*alu, 3, shader);
+ if (shader.chip_class() >= ISA_CC_EVERGREEN)
+ return emit_dot(*alu, 3, shader);
+ else
+ return emit_dot4(*alu, 3, shader);
case nir_op_fdot4:
- return emit_dot(*alu, 4, shader);
+ return emit_dot4(*alu, 4, shader);
case nir_op_feq32:
case nir_op_feq:
const nir_alu_src& src0 = alu.src[0];
const nir_alu_src& src1 = alu.src[1];
- auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
+ auto dest = value_factory.dest(alu.dest.dest, 0, pin_chan);
- AluInstr::SrcValues srcs(8);
+ AluInstr::SrcValues srcs(2 * n);
for (int i = 0; i < n; ++i) {
srcs[2 * i] = value_factory.src(src0, i);
srcs[2 * i + 1] = value_factory.src(src1, i);
}
- for (int i = n; i < 4; ++i) {
- srcs[2 * i] = value_factory.zero();
- srcs[2 * i + 1] = value_factory.zero();
+ auto op =
+ unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ? op2_dot : op2_dot_ieee;
+ AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, n);
+
+ if (src0.negate)
+ ir->set_alu_flag(alu_src0_neg);
+ if (src0.abs)
+ ir->set_alu_flag(alu_src0_abs);
+ if (src1.negate)
+ ir->set_alu_flag(alu_src1_neg);
+ if (src1.abs)
+ ir->set_alu_flag(alu_src1_abs);
+
+ if (alu.dest.saturate)
+ ir->set_alu_flag(alu_dst_clamp);
+
+ shader.emit_instruction(ir);
+ return true;
+}
+
+static bool
+emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader)
+{
+ auto& value_factory = shader.value_factory();
+ const nir_alu_src& src0 = alu.src[0];
+ const nir_alu_src& src1 = alu.src[1];
+
+ auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
+
+ AluInstr::SrcValues srcs(8);
+
+ for (int i = 0; i < nelm; ++i) {
+ srcs[2 * i] = value_factory.src(src0, i);
+ srcs[2 * i + 1] = value_factory.src(src1, i);
+ }
+
+ for (int i = nelm; i < 4; ++i) {
+ srcs[2 * i] = value_factory.zero();
+ srcs[2 * i + 1] = value_factory.zero();
}
auto op =