From 357e5fac9953b26eedc8819ab528b981be6e1b69 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Mon, 12 Dec 2022 14:02:03 +0100 Subject: [PATCH] r600/sfn: Use variable length DOT on Evergreen and Cayman Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp | 63 ++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index 65f526b..e0b8ba3 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -469,7 +469,8 @@ AluInstr::set_sources(SrcValues src) uint8_t AluInstr::allowed_src_chan_mask() const { if (m_alu_slots < 2) - return 0xf; + return 0xf; + int chan_use_count[4] = {0}; for (auto s : m_src) { @@ -1273,6 +1274,8 @@ emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader); static bool emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader); static bool +emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader); +static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader); static bool @@ -1554,11 +1557,17 @@ AluInstr::from_nir(nir_alu_instr *alu, Shader& shader) case nir_op_fdph: return emit_fdph(*alu, shader); case nir_op_fdot2: - return emit_dot(*alu, 2, shader); + if (shader.chip_class() >= ISA_CC_EVERGREEN) + return emit_dot(*alu, 2, shader); + else + return emit_dot4(*alu, 2, shader); case nir_op_fdot3: - return emit_dot(*alu, 3, shader); + if (shader.chip_class() >= ISA_CC_EVERGREEN) + return emit_dot(*alu, 3, shader); + else + return emit_dot4(*alu, 3, shader); case nir_op_fdot4: - return emit_dot(*alu, 4, shader); + return emit_dot4(*alu, 4, shader); case nir_op_feq32: case nir_op_feq: @@ -2497,18 +2506,54 @@ emit_dot(const nir_alu_instr& alu, int n, Shader& shader) const nir_alu_src& src0 = alu.src[0]; const nir_alu_src& src1 = alu.src[1]; - auto dest = value_factory.dest(alu.dest.dest, 0, pin_free); + auto dest = value_factory.dest(alu.dest.dest, 0, pin_chan); - AluInstr::SrcValues srcs(8); + AluInstr::SrcValues srcs(2 * n); for (int i = 0; i < n; ++i) { srcs[2 * i] = value_factory.src(src0, i); srcs[2 * i + 1] = value_factory.src(src1, i); } - for (int i = n; i < 4; ++i) { - srcs[2 * i] = value_factory.zero(); - srcs[2 * i + 1] = value_factory.zero(); + auto op = + unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ? op2_dot : op2_dot_ieee; + AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, n); + + if (src0.negate) + ir->set_alu_flag(alu_src0_neg); + if (src0.abs) + ir->set_alu_flag(alu_src0_abs); + if (src1.negate) + ir->set_alu_flag(alu_src1_neg); + if (src1.abs) + ir->set_alu_flag(alu_src1_abs); + + if (alu.dest.saturate) + ir->set_alu_flag(alu_dst_clamp); + + shader.emit_instruction(ir); + return true; +} + +static bool +emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader) +{ + auto& value_factory = shader.value_factory(); + const nir_alu_src& src0 = alu.src[0]; + const nir_alu_src& src1 = alu.src[1]; + + auto dest = value_factory.dest(alu.dest.dest, 0, pin_free); + + AluInstr::SrcValues srcs(8); + + for (int i = 0; i < nelm; ++i) { + srcs[2 * i] = value_factory.src(src0, i); + srcs[2 * i + 1] = value_factory.src(src1, i); + } + + for (int i = nelm; i < 4; ++i) { + srcs[2 * i] = value_factory.zero(); + srcs[2 * i + 1] = value_factory.zero(); } auto op = -- 2.7.4