radv/llvm,aco: always split typed vertex buffer loads on GFX6 and GFX10+

author Samuel Pitoiset <samuel.pitoiset@gmail.com>

Tue, 24 Nov 2020 13:56:55 +0000 (14:56 +0100)

committer Marge Bot <eric+marge@anholt.net>

Tue, 1 Dec 2020 10:14:27 +0000 (10:14 +0000)
author Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 24 Nov 2020 13:56:55 +0000 (14:56 +0100)
committer Marge Bot <eric+marge@anholt.net>
Tue, 1 Dec 2020 10:14:27 +0000 (10:14 +0000)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index b5e49c266e2cf67a8c2b43f9446a8b698a198650..58bffb8b70b4628156eeabea8042e2359ee55e98 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -4608,11 +4608,17 @@ void visit_load_interpolated_input(isel_context *ctx, nir_intrinsic_instr *instr
  bool check_vertex_fetch_size(isel_context *ctx, const ac_data_format_info *vtx_info,
                               unsigned offset, unsigned stride, unsigned channels)
  {
-   unsigned vertex_byte_size = vtx_info->chan_byte_size * channels;
     if (vtx_info->chan_byte_size != 4 && channels == 3)
        return false;
+
+   /* Always split typed vertex buffer loads on GFX6 and GFX10+ to avoid any
+    * alignment issues that triggers memory violations and eventually a GPU
+    * hang. This can happen if the stride (static or dynamic) is unaligned and
+    * also if the VBO offset is aligned to a scalar (eg. stride is 8 and VBO
+    * offset is 2 for R16G16B16A16_SNORM).
+    */
     return (ctx->options->chip_class >= GFX7 && ctx->options->chip_class <= GFX9) ||
-          (offset % vertex_byte_size == 0 && stride % vertex_byte_size == 0);
+          (channels == 1);
  }
  
  uint8_t get_fetch_data_format(isel_context *ctx, const ac_data_format_info *vtx_info,
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c

index 1b982e8e6b3b2a9b14bfe38f39103ce283aa5bd2..c443a329adf8edeacc7011506ded322822721085 100644 (file)
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -1184,17 +1184,15 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
                 t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
                 t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
  
-               /* Perform per-channel vertex fetch operations if unaligned
-                * access are detected. Only GFX6 and GFX10 are affected.
+               /* Always split typed vertex buffer loads on GFX6 and GFX10+
+                * to avoid any alignment issues that triggers memory
+                * violations and eventually a GPU hang. This can happen if
+                * the stride (static or dynamic) is unaligned and also if the
+                * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
+                * offset is 2 for R16G16B16A16_SNORM).
                  */
-               bool unaligned_vertex_fetches = false;
-               if ((ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) &&
-                   vtx_info->chan_format != data_format &&
-                   ((attrib_offset % vtx_info->element_size) ||
-                    (attrib_stride % vtx_info->element_size)))
-                       unaligned_vertex_fetches = true;
-
-               if (unaligned_vertex_fetches) {
+               if (ctx->ac.chip_class == GFX6 ||
+                   ctx->ac.chip_class >= GFX10) {
                         unsigned chan_format = vtx_info->chan_format;
                         LLVMValueRef values[4];
author	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Tue, 24 Nov 2020 13:56:55 +0000 (14:56 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Tue, 1 Dec 2020 10:14:27 +0000 (10:14 +0000)
src/amd/compiler/aco_instruction_selection.cpp		patch \| blob \| history
src/amd/vulkan/radv_nir_to_llvm.c		patch \| blob \| history