From b6d9e45f473edf4a3cfa86963b1849365f2297b1 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 19 May 2020 11:53:44 +0100 Subject: [PATCH] aco: improve code for f2{i,u}{8,16} MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Use sub-dword definitions so that the RA can use SDWA No fossil-db changes. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 38bf449..0b59a7e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2327,33 +2327,31 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_f2i8: case nir_op_f2i16: { Temp src = get_alu_src(ctx, instr->src[0]); + Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1); if (instr->src[0].src.ssa->bit_size == 16) - src = bld.vop1(aco_opcode::v_cvt_i16_f16, bld.def(v1), src); + src = bld.vop1(aco_opcode::v_cvt_i16_f16, Definition(tmp), src); else if (instr->src[0].src.ssa->bit_size == 32) - src = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), src); + src = bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(tmp), src); else - src = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), src); + src = bld.vop1(aco_opcode::v_cvt_i32_f64, Definition(tmp), src); - if (dst.type() == RegType::vgpr) - bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u)); - else + if (dst.type() != RegType::vgpr) bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src); break; } case nir_op_f2u8: case nir_op_f2u16: { Temp src = get_alu_src(ctx, instr->src[0]); + Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1); if (instr->src[0].src.ssa->bit_size == 16) - src = bld.vop1(aco_opcode::v_cvt_u16_f16, bld.def(v1), src); + bld.vop1(aco_opcode::v_cvt_u16_f16, Definition(tmp), src); else if (instr->src[0].src.ssa->bit_size == 32) - src = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), src); + bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(tmp), src); else - src = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), src); + bld.vop1(aco_opcode::v_cvt_u32_f64, Definition(tmp), src); - if (dst.type() == RegType::vgpr) - bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u)); - else - bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src); + if (dst.type() != RegType::vgpr) + bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); break; } case nir_op_f2i32: { -- 2.7.4