ac/nir,radv: add 1 dword to ES/GS item size
authorQiang Yu <yuq825@gmail.com>
Wed, 31 May 2023 08:33:34 +0000 (16:33 +0800)
committerMarge Bot <emma+marge@anholt.net>
Fri, 9 Jun 2023 02:05:20 +0000 (02:05 +0000)
This reduce LDS bank conflict and align with radeonsi,
so we don't assume LDS access 16 byte aligned for both
driver.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23314>

src/amd/common/ac_nir_lower_esgs_io_to_mem.c
src/amd/vulkan/radv_shader_info.c

index 45b6417..badeb07 100644 (file)
@@ -166,8 +166,7 @@ lower_es_output_store(nir_builder *b,
       /* GFX9+: ES is merged into GS, data is passed through LDS. */
       nir_ssa_def *vertex_idx = nir_build_load_local_invocation_index(b);
       nir_ssa_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
-      nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask,
-                             .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
+      nir_build_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
    }
 
    nir_instr_remove(instr);
@@ -273,8 +272,7 @@ lower_gs_per_vertex_input_load(nir_builder *b,
    nir_ssa_def *off = gs_per_vertex_input_offset(b, st, intrin);
 
    if (st->gfx_level >= GFX9)
-      return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off,
-                                   .align_mul = 16u, .align_offset = (nir_intrinsic_component(intrin) * 4u) % 16u);
+      return nir_build_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
 
    unsigned wave_size = 64u; /* GFX6-8 only support wave64 */
    nir_ssa_def *ring = nir_build_load_ring_esgs_amd(b);
index ce7f435..42880c4 100644 (file)
@@ -1496,6 +1496,12 @@ radv_link_shaders_info(struct radv_device *device,
 
          /* Compute the ESGS item size for VS or TES as ES. */
          producer->info.esgs_itemsize = num_outputs_written * 16;
+
+          /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
+           * conflicts, i.e. each vertex will start on a different bank.
+           */
+         if (device->physical_device->rad_info.gfx_level >= GFX9 && producer->info.esgs_itemsize)
+            producer->info.esgs_itemsize += 4;
       }
 
       /* Compute NGG info (GFX10+) or GS info. */