}
bool
-ac_is_thread_trace_complete(struct radeon_info *rad_info,
+ac_is_thread_trace_complete(const struct radeon_info *rad_info,
const struct ac_thread_trace_data *data,
const struct ac_thread_trace_info *info)
{
/* No active CU on the SE means it is disabled. */
return info->cu_mask[se][0] == 0;
}
+
+bool
+ac_sqtt_get_trace(struct ac_thread_trace_data *data,
+ const struct radeon_info *info,
+ struct ac_thread_trace *thread_trace)
+{
+ unsigned max_se = info->max_se;
+ void *ptr = data->ptr;
+
+ memset(thread_trace, 0, sizeof(*thread_trace));
+
+ for (unsigned se = 0; se < max_se; se++) {
+ uint64_t info_offset = ac_thread_trace_get_info_offset(se);
+ uint64_t data_offset = ac_thread_trace_get_data_offset(info, data, se);
+ void *info_ptr = (uint8_t *)ptr + info_offset;
+ void *data_ptr = (uint8_t *)ptr + data_offset;
+ struct ac_thread_trace_info *trace_info = (struct ac_thread_trace_info *)info_ptr;
+ struct ac_thread_trace_se thread_trace_se = {0};
+ int first_active_cu = ffs(info->cu_mask[se][0]);
+
+ if (ac_sqtt_se_is_disabled(info, se))
+ continue;
+
+ if (!ac_is_thread_trace_complete(info, data, trace_info))
+ return false;
+
+ thread_trace_se.data_ptr = data_ptr;
+ thread_trace_se.info = *trace_info;
+ thread_trace_se.shader_engine = se;
+
+ /* RGP seems to expect units of WGP on GFX10+. */
+ thread_trace_se.compute_unit =
+ info->gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
+
+ thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
+ thread_trace->num_traces++;
+ }
+
+ thread_trace->data = data;
+
+ return true;
+}
ac_thread_trace_finish(struct ac_thread_trace_data *data);
bool
-ac_is_thread_trace_complete(struct radeon_info *rad_info,
+ac_is_thread_trace_complete(const struct radeon_info *rad_info,
const struct ac_thread_trace_data *data,
const struct ac_thread_trace_info *info);
bool ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se);
+bool ac_sqtt_get_trace(struct ac_thread_trace_data *data,
+ const struct radeon_info *info,
+ struct ac_thread_trace *thread_trace);
+
#endif
{
struct radv_device *device = queue->device;
struct radeon_info *rad_info = &device->physical_device->rad_info;
- unsigned max_se = rad_info->max_se;
- void *thread_trace_ptr = device->thread_trace.ptr;
-
- memset(thread_trace, 0, sizeof(*thread_trace));
-
- for (unsigned se = 0; se < max_se; se++) {
- uint64_t info_offset = ac_thread_trace_get_info_offset(se);
- uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
- void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
- void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
- struct ac_thread_trace_info *info = (struct ac_thread_trace_info *)info_ptr;
- struct ac_thread_trace_se thread_trace_se = {0};
- int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
-
- if (ac_sqtt_se_is_disabled(rad_info, se))
- continue;
- if (!ac_is_thread_trace_complete(&device->physical_device->rad_info, &device->thread_trace,
- info)) {
- if (!radv_thread_trace_resize_bo(device)) {
- fprintf(stderr, "Failed to resize the thread "
- "trace buffer.\n");
- abort();
- }
- return false;
+ if (!ac_sqtt_get_trace(&device->thread_trace, rad_info, thread_trace)) {
+ if (!radv_thread_trace_resize_bo(device)) {
+ fprintf(stderr, "Failed to resize the thread trace buffer.\n");
+ abort();
}
- thread_trace_se.data_ptr = data_ptr;
- thread_trace_se.info = *info;
- thread_trace_se.shader_engine = se;
-
- /* RGP seems to expect units of WGP on GFX10+. */
- thread_trace_se.compute_unit = device->physical_device->rad_info.gfx_level >= GFX10
- ? (first_active_cu / 2)
- : first_active_cu;
-
- thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
- thread_trace->num_traces++;
+ return false;
}
- thread_trace->data = &device->thread_trace;
return true;
}
if (!sctx->thread_trace->ptr)
return false;
- void *thread_trace_ptr = sctx->thread_trace->ptr;
+ if (!ac_sqtt_get_trace(sctx->thread_trace, &sctx->screen->info,
+ thread_trace)) {
+ void *thread_trace_ptr = sctx->thread_trace->ptr;
- for (unsigned se = 0; se < max_se; se++) {
- uint64_t info_offset = ac_thread_trace_get_info_offset(se);
- uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se);
- void *info_ptr = thread_trace_ptr + info_offset;
- void *data_ptr = thread_trace_ptr + data_offset;
- struct ac_thread_trace_info *info =
- (struct ac_thread_trace_info *)info_ptr;
-
- struct ac_thread_trace_se thread_trace_se = {0};
+ for (unsigned se = 0; se < max_se; se++) {
+ uint64_t info_offset = ac_thread_trace_get_info_offset(se);
+ void *info_ptr = thread_trace_ptr + info_offset;
+ struct ac_thread_trace_info *info =
+ (struct ac_thread_trace_info *)info_ptr;
- if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
- continue;
+ if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
+ continue;
- if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
- uint32_t expected_size =
- ac_get_expected_buffer_size(&sctx->screen->info, info);
- uint32_t available_size = (info->cur_offset * 32) / 1024;
-
- fprintf(stderr, "Failed to get the thread trace "
- "because the buffer is too small. The "
- "hardware needs %d KB but the "
- "buffer size is %d KB.\n",
- expected_size, available_size);
- fprintf(stderr, "Please update the buffer size with "
- "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
- return false;
+ if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
+ uint32_t expected_size =
+ ac_get_expected_buffer_size(&sctx->screen->info, info);
+ uint32_t available_size = (info->cur_offset * 32) / 1024;
+
+ fprintf(stderr, "Failed to get the thread trace "
+ "because the buffer is too small. The "
+ "hardware needs %d KB but the "
+ "buffer size is %d KB.\n",
+ expected_size, available_size);
+ fprintf(stderr, "Please update the buffer size with "
+ "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
+ return false;
+ }
}
-
- thread_trace_se.data_ptr = data_ptr;
- thread_trace_se.info = *info;
- thread_trace_se.shader_engine = se;
-
- int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
-
- /* For GFX10+ compute_unit really means WGP */
- thread_trace_se.compute_unit =
- sctx->screen->info.gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
-
- thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
- thread_trace->num_traces++;
}
- thread_trace->data = sctx->thread_trace;
return true;
}