radv: add a missing async compute workaround for Tonga/Iceland

author Samuel Pitoiset <samuel.pitoiset@gmail.com>

Wed, 15 Nov 2023 14:06:12 +0000 (15:06 +0100)

committer Eric Engestrom <eric@engestrom.ch>

Sat, 18 Nov 2023 21:15:13 +0000 (21:15 +0000)
author Samuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 15 Nov 2023 14:06:12 +0000 (15:06 +0100)
committer Eric Engestrom <eric@engestrom.ch>
Sat, 18 Nov 2023 21:15:13 +0000 (21:15 +0000)
diff --git a/.pick_status.json b/.pick_status.json

index 8d34aa8d6b6a9cb0e4b6bac44c8d5f53e0d4f552..7f8dfa235b01d6b76a4eee48635f67fdd71a1b19 100644 (file)
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1344,7 +1344,7 @@
          "description": "radv: add a missing async compute workaround for Tonga/Iceland",
          "nominated": true,
          "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
          "main_sha": null,
          "because_sha": null,
          "notes": null
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c

index 16634762cbcdb40ddb77f159d33b255d6f54f99c..e289d741bc6a90db063aa0ea758e9b5bf965b92a 100644 (file)
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -1246,6 +1246,13 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
                                      info->family == CHIP_BONAIRE ||
                                      info->family == CHIP_KABINI;
  
+   /* HW bug workaround with async compute dispatches when threadgroup > 4096.
+    * The workaround is to change the "threadgroup" dimension mode to "thread"
+    * dimension mode.
+    */
+   info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND ||
+                                             info->family == CHIP_TONGA;
+
     /* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
      * feature version wasn't bumped.
      */
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h

index c9d66f7aaba7fead75abcd6e23d4e47c32abd1c7..9cd24a10d01e65abda5ebc5e5f12fc01a266eaf9 100644 (file)
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -101,6 +101,7 @@ struct radeon_info {
     bool has_two_planes_iterate256_bug;
     bool has_vgt_flush_ngg_legacy_bug;
     bool has_cs_regalloc_hang_bug;
+   bool has_async_compute_threadgroup_bug;
     bool has_32bit_predication;
     bool has_3d_cube_border_color_mipmap;
     bool has_image_opcodes;
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c

index 57b720520a859427119ba164a4612ca9f79cff27..fc8736bcf6dafa23fb18b252857a071934d9dccc 100644 (file)
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -9710,11 +9710,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
           radeon_emit(cs, dispatch_initiator);
        }
     } else {
+      const unsigned *cs_block_size = compute_shader->info.cs.block_size;
        unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]};
        unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]};
  
        if (info->unaligned) {
-         const unsigned *cs_block_size = compute_shader->info.cs.block_size;
           unsigned remainder[3];
  
           /* If aligned, these should be an entire block size,
@@ -9779,6 +9779,21 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
           predicating = false;
        }
  
+      if (cmd_buffer->device->physical_device->rad_info.has_async_compute_threadgroup_bug &&
+          cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
+         for (unsigned i = 0; i < 3; i++) {
+            if (info->unaligned) {
+               /* info->blocks is already in thread dimensions for unaligned dispatches. */
+               blocks[i] = info->blocks[i];
+            } else {
+               /* Force the async compute dispatch to be in "thread" dim mode to workaround a hw bug. */
+               blocks[i] *= cs_block_size[i];
+            }
+
+            dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1);
+         }
+      }
+
        radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
        radeon_emit(cs, blocks[0]);
        radeon_emit(cs, blocks[1]);
author	Samuel Pitoiset <samuel.pitoiset@gmail.com>
	Wed, 15 Nov 2023 14:06:12 +0000 (15:06 +0100)
committer	Eric Engestrom <eric@engestrom.ch>
	Sat, 18 Nov 2023 21:15:13 +0000 (21:15 +0000)
.pick_status.json		patch \| blob \| history
src/amd/common/ac_gpu_info.c		patch \| blob \| history
src/amd/common/ac_gpu_info.h		patch \| blob \| history
src/amd/vulkan/radv_cmd_buffer.c		patch \| blob \| history