From 7c83aa0518988a3b2bc2bc6bf74d808db86982d1 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 26 Jan 2021 19:52:50 -0800 Subject: [PATCH] intel/fs: Emit better code for u2u of extract Emitting the instructions one by one results in two MOV instructions that won't be propagated. By handling both instructions at once, a single MOV is emitted. For example, on Ice Lake this helps dEQP-VK.spirv_assembly.type.vec3.i8.bitwise_xor_frag: SIMD8 shader: 49 instructions. 1 loops. 4044 cycles. 0:0 spills:fills, 5 sends SIMD8 shader: 41 instructions. 1 loops. 3804 cycles. 0:0 spills:fills, 5 sends Without "intel/fs: Allow copy propagation between MOVs of mixed sizes," the improvement is still 8 instructions, but there are more instructions to begin with: SIMD8 shader: 52 instructions. 1 loops. 4164 cycles. 0:0 spills:fills, 5 sends SIMD8 shader: 44 instructions. 1 loops. 3944 cycles. 0:0 spills:fills, 5 sends Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/compiler/brw_fs_nir.cpp | 42 +++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index cf71e22..2bc8ea4 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1102,13 +1102,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_f2i32: case nir_op_f2u32: case nir_op_i2f16: - case nir_op_i2i16: case nir_op_u2f16: - case nir_op_u2u16: case nir_op_f2i16: case nir_op_f2u16: - case nir_op_i2i8: - case nir_op_u2u8: case nir_op_f2i8: case nir_op_f2u8: if (result.type == BRW_REGISTER_TYPE_B || @@ -1124,6 +1120,44 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, inst = bld.MOV(result, op[0]); break; + case nir_op_i2i8: + case nir_op_u2u8: + assert(type_sz(op[0].type) < 8); /* brw_nir_lower_conversions */ + FALLTHROUGH; + case nir_op_i2i16: + case nir_op_u2u16: { + /* Emit better code for u2u8(extract_u8(a, b)) and similar patterns. + * Emitting the instructions one by one results in two MOV instructions + * that won't be propagated. By handling both instructions here, a + * single MOV is emitted. + */ + nir_alu_instr *extract_instr = nir_src_as_alu_instr(instr->src[0].src); + if (extract_instr != NULL) { + if (extract_instr->op == nir_op_extract_u8 || + extract_instr->op == nir_op_extract_i8) { + prepare_alu_destination_and_sources(bld, extract_instr, op, false); + + const unsigned byte = nir_src_as_uint(extract_instr->src[1].src); + const brw_reg_type type = + brw_int_type(1, extract_instr->op == nir_op_extract_i8); + + op[0] = subscript(op[0], type, byte); + } else if (extract_instr->op == nir_op_extract_u16 || + extract_instr->op == nir_op_extract_i16) { + prepare_alu_destination_and_sources(bld, extract_instr, op, false); + + const unsigned word = nir_src_as_uint(extract_instr->src[1].src); + const brw_reg_type type = + brw_int_type(2, extract_instr->op == nir_op_extract_i16); + + op[0] = subscript(op[0], type, word); + } + } + + inst = bld.MOV(result, op[0]); + break; + } + case nir_op_fsat: inst = bld.MOV(result, op[0]); inst->saturate = true; -- 2.7.4