intel/fs: Delete manual 'inst->mlen' calculations from all uses of logical URB writes.
authorFrancisco Jerez <currojerez@riseup.net>
Wed, 28 Sep 2022 23:50:41 +0000 (16:50 -0700)
committerMarge Bot <emma+marge@anholt.net>
Wed, 27 Sep 2023 23:57:25 +0000 (23:57 +0000)
Rework:
 * Marcin: update emit_urb_indirect_vec4_write

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25195>

src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/brw_fs_validate.cpp
src/intel/compiler/brw_fs_visitor.cpp
src/intel/compiler/brw_lower_logical_sends.cpp
src/intel/compiler/brw_mesh.cpp

index bcad2e5..ca96949 100644 (file)
@@ -1595,7 +1595,6 @@ fs_visitor::emit_gs_thread_end()
       srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(0);
       inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
                        srcs, ARRAY_SIZE(srcs));
-      inst->mlen = 1;
    } else {
       fs_reg srcs[URB_LOGICAL_NUM_SRCS];
       srcs[URB_LOGICAL_SRC_HANDLE] = gs_payload().urb_handles;
@@ -1603,7 +1602,6 @@ fs_visitor::emit_gs_thread_end()
       srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
       inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
                        srcs, ARRAY_SIZE(srcs));
-      inst->mlen = 2;
    }
    inst->eot = true;
    inst->offset = 0;
@@ -7085,7 +7083,6 @@ fs_visitor::emit_tcs_thread_end()
    srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(1);
    fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
                             reg_undef, srcs, ARRAY_SIZE(srcs));
-   inst->mlen = 3;
    inst->eot = true;
 }
 
index 9f3417b..80993d4 100644 (file)
@@ -2266,13 +2266,8 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
       fwa_bld.SHL(channel_mask, channel_mask, brw_imm_ud(16u));
    }
 
-   /* Store the control data bits in the message payload and send it. */
-   const unsigned header_size = 1 + unsigned(channel_mask.file != BAD_FILE) +
-      unsigned(per_slot_offset.file != BAD_FILE);
-
    /* If there are channel masks, add 3 extra copies of the data. */
    const unsigned length = 1 + 3 * unsigned(channel_mask.file != BAD_FILE);
-
    fs_reg sources[4];
 
    for (unsigned i = 0; i < ARRAY_SIZE(sources); i++)
@@ -2288,7 +2283,7 @@ fs_visitor::emit_gs_control_data_bits(const fs_reg &vertex_count)
 
    fs_inst *inst = abld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
                              srcs, ARRAY_SIZE(srcs));
-   inst->mlen = header_size + length;
+
    /* We need to increment Global Offset by 256-bits to make room for
     * Broadwell's extra "Vertex Count" payload at the beginning of the
     * URB entry.  Since this is an OWord message, Global Offset is counted
@@ -2956,22 +2951,17 @@ fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
 
       assert(has_urb_lsc || m == (first_component + num_components));
 
-      unsigned header_size = 1 + unsigned(indirect_offset.file != BAD_FILE) +
-         unsigned(mask != WRITEMASK_XYZW);
-      const unsigned length = m;
-
       fs_reg srcs[URB_LOGICAL_NUM_SRCS];
       srcs[URB_LOGICAL_SRC_HANDLE] = tcs_payload().patch_urb_output;
       srcs[URB_LOGICAL_SRC_PER_SLOT_OFFSETS] = indirect_offset;
       srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = mask_reg;
-      srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, length);
-      srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(length);
-      bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, length, 0);
+      srcs[URB_LOGICAL_SRC_DATA] = bld.vgrf(BRW_REGISTER_TYPE_F, m);
+      srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(m);
+      bld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], sources, m, 0);
 
       fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
                                srcs, ARRAY_SIZE(srcs));
       inst->offset = imm_offset;
-      inst->mlen = header_size + length;
       break;
    }
 
index 56ce471..35cf439 100644 (file)
@@ -92,21 +92,6 @@ fs_visitor::validate()
 #ifndef NDEBUG
    foreach_block_and_inst (block, fs_inst, inst, cfg) {
       switch (inst->opcode) {
-      case SHADER_OPCODE_URB_WRITE_LOGICAL: {
-         const unsigned header_size = 1 +
-            unsigned(inst->src[URB_LOGICAL_SRC_PER_SLOT_OFFSETS].file != BAD_FILE) +
-            unsigned(inst->src[URB_LOGICAL_SRC_CHANNEL_MASK].file != BAD_FILE);
-
-         unsigned data_size = 0;
-         for (unsigned i = header_size, j = 0; i < inst->mlen; i++, j++) {
-            fsv_assert_eq(type_sz(offset(inst->src[URB_LOGICAL_SRC_DATA], bld, j).type), 4);
-            data_size++;
-         }
-
-         fsv_assert_eq(header_size + data_size, inst->mlen);
-         break;
-      }
-
       case SHADER_OPCODE_SEND:
          fsv_assert(is_uniform(inst->src[0]) && is_uniform(inst->src[1]));
          break;
index 1cdb8dd..6e2c4a3 100644 (file)
@@ -949,7 +949,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       unreachable("invalid stage");
    }
 
-   int header_size = 1;
    fs_reg per_slot_offsets;
 
    if (stage == MESA_SHADER_GEOMETRY) {
@@ -964,12 +963,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       if (gs_prog_data->static_vertex_count == -1)
          starting_urb_offset += 2;
 
-      /* We also need to use per-slot offsets.  The per-slot offset is the
-       * Vertex Count.  SIMD8 mode processes 8 different primitives at a
-       * time; each may output a different number of vertices.
-       */
-      header_size++;
-
       /* The URB offset is in 128-bit units, so we need to multiply by 2 */
       const int output_vertex_size_owords =
          gs_prog_data->output_vertex_size_hwords * 2;
@@ -1130,7 +1123,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
          else
             inst->eot = slot == last_slot && stage != MESA_SHADER_GEOMETRY;
 
-         inst->mlen = length + header_size;
          inst->offset = urb_offset;
          urb_offset = starting_urb_offset + slot + 1;
          length = 0;
@@ -1172,7 +1164,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL, reg_undef,
                                srcs, ARRAY_SIZE(srcs));
       inst->eot = true;
-      inst->mlen = 2;
       inst->offset = 1;
       return;
    }
@@ -1225,7 +1216,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
       fs_inst *inst = bld.exec_all().emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
                                           reg_undef, srcs, ARRAY_SIZE(srcs));
       inst->eot = true;
-      inst->mlen = 6;
       inst->offset = 0;
    }
 }
index 8e50df1..e665f82 100644 (file)
@@ -206,6 +206,7 @@ lower_urb_write_logical_send_xe2(const fs_builder &bld, fs_inst *inst)
    const fs_reg handle = inst->src[URB_LOGICAL_SRC_HANDLE];
    const fs_reg src = inst->components_read(URB_LOGICAL_SRC_DATA) ?
       inst->src[URB_LOGICAL_SRC_DATA] : fs_reg(brw_imm_ud(0));
+   assert(type_sz(src.type) == 4);
 
    /* Calculate the total number of components of the payload. */
    const unsigned src_comps = MAX2(1, inst->components_read(URB_LOGICAL_SRC_DATA));
index 867a215..1486dc3 100644 (file)
@@ -1665,7 +1665,6 @@ emit_urb_direct_vec4_write(const fs_builder &bld,
 
       fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
                                 reg_undef, srcs, ARRAY_SIZE(srcs));
-      inst->mlen = 2 + length;
       inst->offset = urb_global_offset;
       assert(inst->offset < 2048);
    }
@@ -1741,7 +1740,6 @@ emit_urb_indirect_vec4_write(const fs_builder &bld,
 
       fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
                                 reg_undef, srcs, ARRAY_SIZE(srcs));
-      inst->mlen = 3 + length;
       inst->offset = 0;
    }
 }
@@ -1828,7 +1826,6 @@ emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
 
          fs_inst *inst = bld8.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
                                    reg_undef, srcs, ARRAY_SIZE(srcs));
-         inst->mlen = 3 + length;
          inst->offset = 0;
       }
    }