From 34a2c9ce358017faf2f095e886f7de1ce83cc97c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 28 Sep 2022 16:38:35 -0700 Subject: [PATCH] intel/fs: Specify number of data components of logical URB writes via control immediate. This is what most logical SEND messages do when they take a variable number of components. 'inst->mlen' is expected to be zero for logical SEND opcodes, which are expected to behave like plain arithmetic operations, so certain automated transformations (like SIMD lowering) can manipulate them without opcode-specific special-casing. Guessing the number of components from 'inst->mlen' has other disadvantages, because it requires duplicating the logic that infers the message payload size in every use of the instruction -- Instead we can just do the computation once during logical send lowering. In addition on LNL platform this causes the 'inst->mlen' field of URB writes to have units inconsistent with every other SEND instruction, which is likely to lead to confusion and bugs down the road. Rework: * Marcin: update emit_urb_indirect_vec4_write Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_eu_defines.h | 2 +- src/intel/compiler/brw_fs.cpp | 9 ++++++--- src/intel/compiler/brw_fs_nir.cpp | 2 ++ src/intel/compiler/brw_fs_visitor.cpp | 3 +++ src/intel/compiler/brw_lower_logical_sends.cpp | 12 ++++++++---- src/intel/compiler/brw_mesh.cpp | 3 +++ 6 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 8a95142..b01c221 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -982,7 +982,7 @@ enum urb_logical_srcs { URB_LOGICAL_SRC_CHANNEL_MASK, /** Data to be written. BAD_FILE for reads. */ URB_LOGICAL_SRC_DATA, - + URB_LOGICAL_SRC_COMPONENTS, URB_LOGICAL_NUM_SRCS }; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index c5d7c1f..bcad2e5 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -843,10 +843,10 @@ fs_inst::components_read(unsigned i) const return (i == 0 ? 2 : 1); case SHADER_OPCODE_URB_WRITE_LOGICAL: + assert(src[URB_LOGICAL_SRC_COMPONENTS].file == IMM); + if (i == URB_LOGICAL_SRC_DATA) - return mlen - 1 - - unsigned(src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) - - unsigned(src[URB_LOGICAL_SRC_CHANNEL_MASK].file != BAD_FILE); + return src[URB_LOGICAL_SRC_COMPONENTS].ud; else return 1; @@ -1592,6 +1592,7 @@ fs_visitor::emit_gs_thread_end() fs_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(0); inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); inst->mlen = 1; @@ -1599,6 +1600,7 @@ fs_visitor::emit_gs_thread_end() fs_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles; srcs[URB_LOGICAL_SRC_DATA] = this->final_gs_vertex_count; + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); inst->mlen = 2; @@ -7080,6 +7082,7 @@ fs_visitor::emit_tcs_thread_end() srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(WRITEMASK_X << 16); srcs[URB_LOGICAL_SRC_DATA] = brw_imm_ud(0); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); inst->mlen = 3; diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 37f8763..9f3417b 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -2283,6 +2283,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count) srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = per_slot_offset; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = channel_mask; srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0); fs_inst *inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, @@ -2964,6 +2965,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld, srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask_reg; srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0); fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index a05f218..1cdb8dd 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -1118,6 +1118,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, alloc.allocate((dispatch_width / 8) * length), BRW_REGISTER_TYPE_F); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); abld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0); fs_inst *inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, @@ -1166,6 +1167,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) fs_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle; srcs[URB_LOGICAL_SRC_DATA] = payload; + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1); fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); @@ -1218,6 +1220,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) srcs[URB_LOGICAL_SRC_HANDLE] = uniform_urb_handle; srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = uniform_mask; srcs[URB_LOGICAL_SRC_DATA] = payload; + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(4); fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef, srcs, ARRAY_SIZE(srcs)); diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 27a688e..8e50df1 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -150,8 +150,11 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst) assert(inst->header_size == 0); - fs_reg *payload_sources = new fs_reg[inst->mlen]; - fs_reg payload = fs_reg(VGRF, bld.shader->alloc.allocate(inst->mlen), + const unsigned length = 1 + per_slot_present + channel_mask_present + + inst->components_read(URB_LOGICAL_SRC_DATA); + + fs_reg *payload_sources = new fs_reg[length]; + fs_reg payload = fs_reg(VGRF, bld.shader->alloc.allocate(length), BRW_REGISTER_TYPE_F); unsigned header_size = 0; @@ -162,10 +165,10 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst) if (channel_mask_present) payload_sources[header_size++] = inst->src[URB_LOGICAL_SRC_CHANNEL_MASK]; - for (unsigned i = header_size, j = 0; i < inst->mlen; i++, j++) + for (unsigned i = header_size, j = 0; i < length; i++, j++) payload_sources[i] = offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j); - bld.LOAD_PAYLOAD(payload, payload_sources, inst->mlen, header_size); + bld.LOAD_PAYLOAD(payload, payload_sources, length, header_size); delete [] payload_sources; @@ -180,6 +183,7 @@ lower_urb_write_logical_send(const fs_builder &bld, fs_inst *inst) channel_mask_present, inst->offset); + inst->mlen = length; inst->ex_desc = 0; inst->ex_mlen = 0; inst->send_has_side_effects = true; diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index 1e4b3a5..867a215 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -1660,6 +1660,7 @@ emit_urb_direct_vec4_write(const fs_builder &bld, srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length), BRW_REGISTER_TYPE_F); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, @@ -1735,6 +1736,7 @@ emit_urb_indirect_vec4_write(const fs_builder &bld, srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16); srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length), BRW_REGISTER_TYPE_F); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, @@ -1821,6 +1823,7 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr, srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask; srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, bld.shader->alloc.allocate(length), BRW_REGISTER_TYPE_F); + srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length); bld8.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, length, 0); fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, -- 2.7.4