ac/sqtt: add ac_sqtt_get_trace() helper
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 26 Apr 2023 12:26:07 +0000 (14:26 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 28 Apr 2023 16:55:12 +0000 (16:55 +0000)
It can be shared between RADV and RadeonSI. The only difference is
that RadeonSI can't auto-resize the SQTT BO.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22732>

src/amd/common/ac_sqtt.c
src/amd/common/ac_sqtt.h
src/amd/vulkan/radv_sqtt.c
src/gallium/drivers/radeonsi/si_sqtt.c

index 5e596fe..7570eb0 100644 (file)
@@ -107,7 +107,7 @@ ac_thread_trace_finish(struct ac_thread_trace_data *data)
 }
 
 bool
-ac_is_thread_trace_complete(struct radeon_info *rad_info,
+ac_is_thread_trace_complete(const struct radeon_info *rad_info,
                             const struct ac_thread_trace_data *data,
                             const struct ac_thread_trace_info *info)
 {
@@ -262,3 +262,45 @@ ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se)
    /* No active CU on the SE means it is disabled. */
    return info->cu_mask[se][0] == 0;
 }
+
+bool
+ac_sqtt_get_trace(struct ac_thread_trace_data *data,
+                  const struct radeon_info *info,
+                  struct ac_thread_trace *thread_trace)
+{
+   unsigned max_se = info->max_se;
+   void *ptr = data->ptr;
+
+   memset(thread_trace, 0, sizeof(*thread_trace));
+
+   for (unsigned se = 0; se < max_se; se++) {
+      uint64_t info_offset = ac_thread_trace_get_info_offset(se);
+      uint64_t data_offset = ac_thread_trace_get_data_offset(info, data, se);
+      void *info_ptr = (uint8_t *)ptr + info_offset;
+      void *data_ptr = (uint8_t *)ptr + data_offset;
+      struct ac_thread_trace_info *trace_info = (struct ac_thread_trace_info *)info_ptr;
+      struct ac_thread_trace_se thread_trace_se = {0};
+      int first_active_cu = ffs(info->cu_mask[se][0]);
+
+      if (ac_sqtt_se_is_disabled(info, se))
+         continue;
+
+      if (!ac_is_thread_trace_complete(info, data, trace_info))
+         return false;
+
+      thread_trace_se.data_ptr = data_ptr;
+      thread_trace_se.info = *trace_info;
+      thread_trace_se.shader_engine = se;
+
+      /* RGP seems to expect units of WGP on GFX10+. */
+      thread_trace_se.compute_unit =
+         info->gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
+
+      thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
+      thread_trace->num_traces++;
+   }
+
+   thread_trace->data = data;
+
+   return true;
+}
index a3b0208..505f2ba 100644 (file)
@@ -106,7 +106,7 @@ void
 ac_thread_trace_finish(struct ac_thread_trace_data *data);
 
 bool
-ac_is_thread_trace_complete(struct radeon_info *rad_info,
+ac_is_thread_trace_complete(const struct radeon_info *rad_info,
                             const struct ac_thread_trace_data *data,
                             const struct ac_thread_trace_info *info);
 
@@ -562,4 +562,8 @@ union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_thread_trace_da
 
 bool ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se);
 
+bool ac_sqtt_get_trace(struct ac_thread_trace_data *data,
+                       const struct radeon_info *info,
+                       struct ac_thread_trace *thread_trace);
+
 #endif
index 0f5d413..03eaa3b 100644 (file)
@@ -810,47 +810,16 @@ radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_t
 {
    struct radv_device *device = queue->device;
    struct radeon_info *rad_info = &device->physical_device->rad_info;
-   unsigned max_se = rad_info->max_se;
-   void *thread_trace_ptr = device->thread_trace.ptr;
-
-   memset(thread_trace, 0, sizeof(*thread_trace));
-
-   for (unsigned se = 0; se < max_se; se++) {
-      uint64_t info_offset = ac_thread_trace_get_info_offset(se);
-      uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
-      void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
-      void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
-      struct ac_thread_trace_info *info = (struct ac_thread_trace_info *)info_ptr;
-      struct ac_thread_trace_se thread_trace_se = {0};
-      int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
-
-      if (ac_sqtt_se_is_disabled(rad_info, se))
-         continue;
 
-      if (!ac_is_thread_trace_complete(&device->physical_device->rad_info, &device->thread_trace,
-                                       info)) {
-         if (!radv_thread_trace_resize_bo(device)) {
-            fprintf(stderr, "Failed to resize the thread "
-                            "trace buffer.\n");
-            abort();
-         }
-         return false;
+   if (!ac_sqtt_get_trace(&device->thread_trace, rad_info, thread_trace)) {
+      if (!radv_thread_trace_resize_bo(device)) {
+         fprintf(stderr, "Failed to resize the thread trace buffer.\n");
+         abort();
       }
 
-      thread_trace_se.data_ptr = data_ptr;
-      thread_trace_se.info = *info;
-      thread_trace_se.shader_engine = se;
-
-      /* RGP seems to expect units of WGP on GFX10+. */
-      thread_trace_se.compute_unit = device->physical_device->rad_info.gfx_level >= GFX10
-                                        ? (first_active_cu / 2)
-                                        : first_active_cu;
-
-      thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
-      thread_trace->num_traces++;
+      return false;
    }
 
-   thread_trace->data = &device->thread_trace;
    return true;
 }
 
index 5d2438e..d9c91c1 100644 (file)
@@ -616,51 +616,36 @@ si_get_thread_trace(struct si_context *sctx,
    if (!sctx->thread_trace->ptr)
       return false;
 
-   void *thread_trace_ptr = sctx->thread_trace->ptr;
+   if (!ac_sqtt_get_trace(sctx->thread_trace, &sctx->screen->info,
+                          thread_trace)) {
+      void *thread_trace_ptr = sctx->thread_trace->ptr;
 
-   for (unsigned se = 0; se < max_se; se++) {
-      uint64_t info_offset = ac_thread_trace_get_info_offset(se);
-      uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se);
-      void *info_ptr = thread_trace_ptr + info_offset;
-      void *data_ptr = thread_trace_ptr + data_offset;
-      struct ac_thread_trace_info *info =
-         (struct ac_thread_trace_info *)info_ptr;
-
-      struct ac_thread_trace_se thread_trace_se = {0};
+      for (unsigned se = 0; se < max_se; se++) {
+         uint64_t info_offset = ac_thread_trace_get_info_offset(se);
+         void *info_ptr = thread_trace_ptr + info_offset;
+         struct ac_thread_trace_info *info =
+            (struct ac_thread_trace_info *)info_ptr;
 
-      if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
-         continue;
+         if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
+            continue;
 
-      if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
-         uint32_t expected_size =
-            ac_get_expected_buffer_size(&sctx->screen->info, info);
-         uint32_t available_size = (info->cur_offset * 32) / 1024;
-
-         fprintf(stderr, "Failed to get the thread trace "
-                 "because the buffer is too small. The "
-                 "hardware needs %d KB but the "
-                 "buffer size is %d KB.\n",
-                 expected_size, available_size);
-         fprintf(stderr, "Please update the buffer size with "
-                 "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
-         return false;
+         if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
+            uint32_t expected_size =
+               ac_get_expected_buffer_size(&sctx->screen->info, info);
+            uint32_t available_size = (info->cur_offset * 32) / 1024;
+
+            fprintf(stderr, "Failed to get the thread trace "
+                    "because the buffer is too small. The "
+                    "hardware needs %d KB but the "
+                    "buffer size is %d KB.\n",
+                    expected_size, available_size);
+            fprintf(stderr, "Please update the buffer size with "
+                    "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
+            return false;
+         }
       }
-
-      thread_trace_se.data_ptr = data_ptr;
-      thread_trace_se.info = *info;
-      thread_trace_se.shader_engine = se;
-
-      int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
-
-      /* For GFX10+ compute_unit really means WGP */
-      thread_trace_se.compute_unit =
-         sctx->screen->info.gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
-
-      thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
-      thread_trace->num_traces++;
    }
 
-   thread_trace->data = sctx->thread_trace;
    return true;
 }