SQTT stands for SQ Thread Trace but it's shorter.
Note that environment variables aren't renamed because this might
break external applications.
This renames:
- ac_thread_trace_data to ac_sqtt (this is the main struct)
- ac_thread_trace_info to ac_sqtt_data_info
- ac_thread_trace_se to ac_sqtt_data_se
- ac_thread_trace to ac_sqtt_trace (this contains trace only)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22732>
}
#if defined(USE_LIBELF)
-static void ac_sqtt_dump_data(struct radeon_info *rad_info,
- struct ac_thread_trace *thread_trace,
- const struct ac_spm_trace *spm_trace,
- FILE *output)
+static void
+ac_sqtt_dump_data(struct radeon_info *rad_info, struct ac_sqtt_trace *sqtt_trace,
+ const struct ac_spm_trace *spm_trace, FILE *output)
{
struct sqtt_file_chunk_asic_info asic_info = {0};
struct sqtt_file_chunk_cpu_info cpu_info = {0};
struct sqtt_file_chunk_api_info api_info = {0};
struct sqtt_file_header header = {0};
size_t file_offset = 0;
- const struct rgp_code_object *rgp_code_object = thread_trace->rgp_code_object;
- const struct rgp_loader_events *rgp_loader_events = thread_trace->rgp_loader_events;
- const struct rgp_pso_correlation *rgp_pso_correlation = thread_trace->rgp_pso_correlation;
- const struct rgp_queue_info *rgp_queue_info = thread_trace->rgp_queue_info;
- const struct rgp_queue_event *rgp_queue_event = thread_trace->rgp_queue_event;
- const struct rgp_clock_calibration *rgp_clock_calibration = thread_trace->rgp_clock_calibration;
+ const struct rgp_code_object *rgp_code_object = sqtt_trace->rgp_code_object;
+ const struct rgp_loader_events *rgp_loader_events = sqtt_trace->rgp_loader_events;
+ const struct rgp_pso_correlation *rgp_pso_correlation = sqtt_trace->rgp_pso_correlation;
+ const struct rgp_queue_info *rgp_queue_info = sqtt_trace->rgp_queue_info;
+ const struct rgp_queue_event *rgp_queue_event = sqtt_trace->rgp_queue_event;
+ const struct rgp_clock_calibration *rgp_clock_calibration = sqtt_trace->rgp_clock_calibration;
/* SQTT header file. */
ac_sqtt_fill_header(&header);
}
}
- if (thread_trace) {
- for (unsigned i = 0; i < thread_trace->num_traces; i++) {
- const struct ac_thread_trace_se *se = &thread_trace->traces[i];
- const struct ac_thread_trace_info *info = &se->info;
+ if (sqtt_trace) {
+ for (unsigned i = 0; i < sqtt_trace->num_traces; i++) {
+ const struct ac_sqtt_data_se *se = &sqtt_trace->traces[i];
+ const struct ac_sqtt_data_info *info = &se->info;
struct sqtt_file_chunk_sqtt_desc desc = {0};
struct sqtt_file_chunk_sqtt_data data = {0};
uint64_t size = info->cur_offset * 32; /* unit of 32 bytes */
}
#endif
-int ac_dump_rgp_capture(struct radeon_info *info,
- struct ac_thread_trace *thread_trace,
- const struct ac_spm_trace *spm_trace)
+int
+ac_dump_rgp_capture(struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace,
+ const struct ac_spm_trace *spm_trace)
{
#if !defined(USE_LIBELF)
return -1;
if (!f)
return -1;
- ac_sqtt_dump_data(info, thread_trace, spm_trace, f);
+ ac_sqtt_dump_data(info, sqtt_trace, spm_trace, f);
fprintf(stderr, "RGP capture saved to '%s'\n", filename);
#include "util/simple_mtx.h"
struct radeon_info;
-struct ac_thread_trace;
-struct ac_thread_trace_data;
+struct ac_sqtt_trace;
+struct ac_sqtt;
struct ac_spm_trace;
enum rgp_hardware_stages {
simple_mtx_t lock;
};
-int
-ac_dump_rgp_capture(struct radeon_info *info,
- struct ac_thread_trace *thread_trace,
- const struct ac_spm_trace *spm_trace);
+int ac_dump_rgp_capture(struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace,
+ const struct ac_spm_trace *spm_trace);
void
ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,
#include "util/os_time.h"
uint64_t
-ac_thread_trace_get_info_offset(unsigned se)
+ac_sqtt_get_info_offset(unsigned se)
{
- return sizeof(struct ac_thread_trace_info) * se;
+ return sizeof(struct ac_sqtt_data_info) * se;
}
uint64_t
-ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
- const struct ac_thread_trace_data *data, unsigned se)
+ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *data, unsigned se)
{
unsigned max_se = rad_info->max_se;
uint64_t data_offset;
- data_offset = align64(sizeof(struct ac_thread_trace_info) * max_se,
- 1 << SQTT_BUFFER_ALIGN_SHIFT);
+ data_offset = align64(sizeof(struct ac_sqtt_data_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT);
data_offset += data->buffer_size * se;
return data_offset;
}
uint64_t
-ac_thread_trace_get_info_va(uint64_t va, unsigned se)
+ac_sqtt_get_info_va(uint64_t va, unsigned se)
{
- return va + ac_thread_trace_get_info_offset(se);
+ return va + ac_sqtt_get_info_offset(se);
}
uint64_t
-ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
- const struct ac_thread_trace_data *data, uint64_t va, unsigned se)
+ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *data, uint64_t va,
+ unsigned se)
{
- return va + ac_thread_trace_get_data_offset(rad_info, data, se);
+ return va + ac_sqtt_get_data_offset(rad_info, data, se);
}
void
-ac_thread_trace_init(struct ac_thread_trace_data *data)
+ac_sqtt_init(struct ac_sqtt *data)
{
list_inithead(&data->rgp_pso_correlation.record);
simple_mtx_init(&data->rgp_pso_correlation.lock, mtx_plain);
}
void
-ac_thread_trace_finish(struct ac_thread_trace_data *data)
+ac_sqtt_finish(struct ac_sqtt *data)
{
assert(data->rgp_pso_correlation.record_count == 0);
simple_mtx_destroy(&data->rgp_pso_correlation.lock);
}
bool
-ac_is_thread_trace_complete(const struct radeon_info *rad_info,
- const struct ac_thread_trace_data *data,
- const struct ac_thread_trace_info *info)
+ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *data,
+ const struct ac_sqtt_data_info *info)
{
if (rad_info->gfx_level >= GFX10) {
/* GFX10 doesn't have THREAD_TRACE_CNTR but it reports the number of
}
uint32_t
-ac_get_expected_buffer_size(struct radeon_info *rad_info,
- const struct ac_thread_trace_info *info)
+ac_get_expected_buffer_size(struct radeon_info *rad_info, const struct ac_sqtt_data_info *info)
{
if (rad_info->gfx_level >= GFX10) {
uint32_t dropped_cntr_per_se = info->gfx10_dropped_cntr / rad_info->max_se;
}
bool
-ac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data,
- uint64_t pipeline_hash)
+ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash)
{
- struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
+ struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
struct rgp_pso_correlation_record *record;
record = malloc(sizeof(struct rgp_pso_correlation_record));
}
bool
-ac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data,
- uint64_t pipeline_hash,
+ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash,
uint64_t base_address)
{
- struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
+ struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
struct rgp_loader_events_record *record;
record = malloc(sizeof(struct rgp_loader_events_record));
}
bool
-ac_sqtt_add_clock_calibration(struct ac_thread_trace_data *thread_trace_data,
- uint64_t cpu_timestamp, uint64_t gpu_timestamp)
+ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, uint64_t gpu_timestamp)
{
- struct rgp_clock_calibration *clock_calibration = &thread_trace_data->rgp_clock_calibration;
+ struct rgp_clock_calibration *clock_calibration = &sqtt->rgp_clock_calibration;
struct rgp_clock_calibration_record *record;
record = malloc(sizeof(struct rgp_clock_calibration_record));
}
union rgp_sqtt_marker_cb_id
-ac_sqtt_get_next_cmdbuf_id(struct ac_thread_trace_data *data,
- enum amd_ip_type ip_type)
+ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *data, enum amd_ip_type ip_type)
{
union rgp_sqtt_marker_cb_id cb_id = {0};
}
bool
-ac_sqtt_get_trace(struct ac_thread_trace_data *data,
- const struct radeon_info *info,
- struct ac_thread_trace *thread_trace)
+ac_sqtt_get_trace(struct ac_sqtt *data, const struct radeon_info *info,
+ struct ac_sqtt_trace *sqtt_trace)
{
unsigned max_se = info->max_se;
void *ptr = data->ptr;
- memset(thread_trace, 0, sizeof(*thread_trace));
+ memset(sqtt_trace, 0, sizeof(*sqtt_trace));
for (unsigned se = 0; se < max_se; se++) {
- uint64_t info_offset = ac_thread_trace_get_info_offset(se);
- uint64_t data_offset = ac_thread_trace_get_data_offset(info, data, se);
+ uint64_t info_offset = ac_sqtt_get_info_offset(se);
+ uint64_t data_offset = ac_sqtt_get_data_offset(info, data, se);
void *info_ptr = (uint8_t *)ptr + info_offset;
void *data_ptr = (uint8_t *)ptr + data_offset;
- struct ac_thread_trace_info *trace_info = (struct ac_thread_trace_info *)info_ptr;
- struct ac_thread_trace_se thread_trace_se = {0};
+ struct ac_sqtt_data_info *trace_info = (struct ac_sqtt_data_info *)info_ptr;
+ struct ac_sqtt_data_se data_se = {0};
int first_active_cu = ffs(info->cu_mask[se][0]);
if (ac_sqtt_se_is_disabled(info, se))
continue;
- if (!ac_is_thread_trace_complete(info, data, trace_info))
+ if (!ac_is_sqtt_complete(info, data, trace_info))
return false;
- thread_trace_se.data_ptr = data_ptr;
- thread_trace_se.info = *trace_info;
- thread_trace_se.shader_engine = se;
+ data_se.data_ptr = data_ptr;
+ data_se.info = *trace_info;
+ data_se.shader_engine = se;
/* RGP seems to expect units of WGP on GFX10+. */
- thread_trace_se.compute_unit =
- info->gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
+ data_se.compute_unit = info->gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu;
- thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
- thread_trace->num_traces++;
+ sqtt_trace->traces[sqtt_trace->num_traces] = data_se;
+ sqtt_trace->num_traces++;
}
- thread_trace->rgp_code_object = &data->rgp_code_object;
- thread_trace->rgp_loader_events = &data->rgp_loader_events;
- thread_trace->rgp_pso_correlation = &data->rgp_pso_correlation;
- thread_trace->rgp_queue_info = &data->rgp_queue_info;
- thread_trace->rgp_queue_event = &data->rgp_queue_event;
- thread_trace->rgp_clock_calibration = &data->rgp_clock_calibration;
+ sqtt_trace->rgp_code_object = &data->rgp_code_object;
+ sqtt_trace->rgp_loader_events = &data->rgp_loader_events;
+ sqtt_trace->rgp_pso_correlation = &data->rgp_pso_correlation;
+ sqtt_trace->rgp_queue_info = &data->rgp_queue_info;
+ sqtt_trace->rgp_queue_event = &data->rgp_queue_event;
+ sqtt_trace->rgp_clock_calibration = &data->rgp_clock_calibration;
return true;
}
struct radeon_cmdbuf;
struct radeon_info;
-struct ac_thread_trace_data {
+/**
+ * SQ Thread tracing is a tracing mechanism that allows taking a detailed look
+ * at what the shader cores are doing.
+ *
+ * Among the things recorded are:
+ * - draws/dispatches + state
+ * - when each wave starts and stops.
+ * - for one SIMD per SE all instructions executed on that SIMD.
+ *
+ * The hardware stores all these as events in a buffer, no manual barrier
+ * around each command needed. The primary user of this is RGP.
+ */
+struct ac_sqtt {
struct radeon_cmdbuf *start_cs[2];
struct radeon_cmdbuf *stop_cs[2];
/* struct radeon_winsys_bo or struct pb_buffer */
#define SQTT_BUFFER_ALIGN_SHIFT 12
-struct ac_thread_trace_info {
+struct ac_sqtt_data_info {
uint32_t cur_offset;
uint32_t trace_status;
union {
};
};
-struct ac_thread_trace_se {
- struct ac_thread_trace_info info;
+struct ac_sqtt_data_se {
+ struct ac_sqtt_data_info info;
void *data_ptr;
uint32_t shader_engine;
uint32_t compute_unit;
#define SQTT_MAX_TRACES 6
-struct ac_thread_trace {
+struct ac_sqtt_trace {
const struct rgp_code_object *rgp_code_object;
const struct rgp_loader_events *rgp_loader_events;
const struct rgp_pso_correlation *rgp_pso_correlation;
const struct rgp_clock_calibration *rgp_clock_calibration;
uint32_t num_traces;
- struct ac_thread_trace_se traces[SQTT_MAX_TRACES];
+ struct ac_sqtt_data_se traces[SQTT_MAX_TRACES];
};
-uint64_t
-ac_thread_trace_get_info_offset(unsigned se);
+uint64_t ac_sqtt_get_info_offset(unsigned se);
-uint64_t
-ac_thread_trace_get_data_offset(const struct radeon_info *rad_info,
- const struct ac_thread_trace_data *data, unsigned se);
-uint64_t
-ac_thread_trace_get_info_va(uint64_t va, unsigned se);
+uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
+ unsigned se);
+uint64_t ac_sqtt_get_info_va(uint64_t va, unsigned se);
-uint64_t
-ac_thread_trace_get_data_va(const struct radeon_info *rad_info,
- const struct ac_thread_trace_data *data, uint64_t va, unsigned se);
+uint64_t ac_sqtt_get_data_va(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
+ uint64_t va, unsigned se);
-void
-ac_thread_trace_init(struct ac_thread_trace_data *data);
+void ac_sqtt_init(struct ac_sqtt *data);
-void
-ac_thread_trace_finish(struct ac_thread_trace_data *data);
+void ac_sqtt_finish(struct ac_sqtt *data);
-bool
-ac_is_thread_trace_complete(const struct radeon_info *rad_info,
- const struct ac_thread_trace_data *data,
- const struct ac_thread_trace_info *info);
+bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt,
+ const struct ac_sqtt_data_info *info);
-uint32_t
-ac_get_expected_buffer_size(struct radeon_info *rad_info,
- const struct ac_thread_trace_info *info);
+uint32_t ac_get_expected_buffer_size(struct radeon_info *rad_info,
+ const struct ac_sqtt_data_info *info);
/**
* Identifiers for RGP SQ thread-tracing markers (Table 1)
static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12,
"rgp_sqtt_marker_pipeline_bind doesn't match RGP spec");
+bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash);
-bool ac_sqtt_add_pso_correlation(struct ac_thread_trace_data *thread_trace_data,
- uint64_t pipeline_hash);
-
-bool ac_sqtt_add_code_object_loader_event(struct ac_thread_trace_data *thread_trace_data,
- uint64_t pipeline_hash,
+bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash,
uint64_t base_address);
-bool ac_sqtt_add_clock_calibration(struct ac_thread_trace_data *thread_trace_data,
- uint64_t cpu_timestamp,
+bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp,
uint64_t gpu_timestamp);
bool ac_check_profile_state(const struct radeon_info *info);
-union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_thread_trace_data *data,
+union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt,
enum amd_ip_type ip_type);
bool ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se);
-bool ac_sqtt_get_trace(struct ac_thread_trace_data *data,
- const struct radeon_info *info,
- struct ac_thread_trace *thread_trace);
+bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info,
+ struct ac_sqtt_trace *sqtt_trace);
#endif
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
marker.api_type = api_type;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
static void
marker.api_type = api_type;
marker.is_end = 1;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
static void
marker.instance_offset_reg_idx = instance_offset_user_data;
marker.draw_index_reg_idx = draw_index_user_data;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
static void
marker.thread_y = y;
marker.thread_z = z;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
static void
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
marker.data_type = type;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
} else {
assert(str != NULL);
unsigned len = strlen(str);
memcpy(buffer, &marker, sizeof(marker));
memcpy(buffer + sizeof(marker), str, len);
- radv_emit_thread_trace_userdata(cmd_buffer, buffer,
- sizeof(marker) / 4 + marker.length / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, buffer, sizeof(marker) / 4 + marker.length / 4);
}
}
uint64_t device_id = (uintptr_t)cmd_buffer->device;
struct rgp_sqtt_marker_cb_start marker = {0};
- if (likely(!cmd_buffer->device->thread_trace.bo))
+ if (likely(!cmd_buffer->device->sqtt.bo))
return;
/* Reserve a command buffer ID for SQTT. */
enum amd_ip_type ip_type =
radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf);
union rgp_sqtt_marker_cb_id cb_id =
- ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->thread_trace, ip_type);
+ ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
cmd_buffer->sqtt_cb_id = cb_id.all;
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
void
uint64_t device_id = (uintptr_t)cmd_buffer->device;
struct rgp_sqtt_marker_cb_end marker = {0};
- if (likely(!cmd_buffer->device->thread_trace.bo))
+ if (likely(!cmd_buffer->device->sqtt.bo))
return;
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
marker.device_id_low = device_id;
marker.device_id_high = device_id >> 32;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
void
radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
{
- if (likely(!cmd_buffer->device->thread_trace.bo))
+ if (likely(!cmd_buffer->device->sqtt.bo))
return;
radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX,
void
radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z)
{
- if (likely(!cmd_buffer->device->thread_trace.bo))
+ if (likely(!cmd_buffer->device->sqtt.bo))
return;
radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, x, y, z);
{
struct rgp_sqtt_marker_barrier_end marker = {0};
- if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
+ if (likely(!cmd_buffer->device->sqtt.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
return;
cmd_buffer->state.pending_sqtt_barrier_end = false;
if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
marker.inval_gl1 = true;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
cmd_buffer->state.num_layout_transitions = 0;
}
{
struct rgp_sqtt_marker_barrier_start marker = {0};
- if (likely(!cmd_buffer->device->thread_trace.bo))
+ if (likely(!cmd_buffer->device->sqtt.bo))
return;
radv_describe_barrier_end_delayed(cmd_buffer);
marker.cb_id = cmd_buffer->sqtt_cb_id;
marker.dword02 = reason;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
void
{
struct rgp_sqtt_marker_layout_transition marker = {0};
- if (likely(!cmd_buffer->device->thread_trace.bo))
+ if (likely(!cmd_buffer->device->sqtt.bo))
return;
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
cmd_buffer->state.num_layout_transitions++;
}
{
struct rgp_sqtt_marker_pipeline_bind marker = {0};
- if (likely(!cmd_buffer->device->thread_trace.bo))
+ if (likely(!cmd_buffer->device->sqtt.bo))
return;
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
marker.api_pso_hash[0] = pipeline->pipeline_hash;
marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
- radv_emit_thread_trace_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
+ radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
}
/* TODO: Improve the way to trigger capture (overlay, etc). */
static void
-radv_handle_thread_trace(VkQueue _queue)
+radv_handle_sqtt(VkQueue _queue)
{
RADV_FROM_HANDLE(radv_queue, queue, _queue);
- static bool thread_trace_enabled = false;
+ static bool sqtt_enabled = false;
static uint64_t num_frames = 0;
bool resize_trigger = false;
- if (thread_trace_enabled) {
- struct ac_thread_trace thread_trace = {0};
+ if (sqtt_enabled) {
+ struct ac_sqtt_trace sqtt_trace = {0};
- radv_end_thread_trace(queue);
- thread_trace_enabled = false;
+ radv_end_sqtt(queue);
+ sqtt_enabled = false;
/* TODO: Do something better than this whole sync. */
queue->device->vk.dispatch_table.QueueWaitIdle(_queue);
- if (radv_get_thread_trace(queue, &thread_trace)) {
+ if (radv_get_sqtt_trace(queue, &sqtt_trace)) {
struct ac_spm_trace spm_trace;
if (queue->device->spm.bo)
ac_spm_get_trace(&queue->device->spm, &spm_trace);
- ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace,
+ ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &sqtt_trace,
queue->device->spm.bo ? &spm_trace : NULL);
} else {
/* Trigger a new capture if the driver failed to get
}
/* Clear resources used for this capture. */
- radv_reset_thread_trace(queue->device);
+ radv_reset_sqtt_trace(queue->device);
}
- if (!thread_trace_enabled) {
- bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;
+ if (!sqtt_enabled) {
+ bool frame_trigger = num_frames == queue->device->sqtt.start_frame;
bool file_trigger = false;
#ifndef _WIN32
- if (queue->device->thread_trace.trigger_file &&
- access(queue->device->thread_trace.trigger_file, W_OK) == 0) {
- if (unlink(queue->device->thread_trace.trigger_file) == 0) {
+ if (queue->device->sqtt.trigger_file && access(queue->device->sqtt.trigger_file, W_OK) == 0) {
+ if (unlink(queue->device->sqtt.trigger_file) == 0) {
file_trigger = true;
} else {
/* Do not enable tracing if we cannot remove the file,
}
/* Sample CPU/GPU clocks before starting the trace. */
- if (!radv_thread_trace_sample_clocks(queue->device)) {
+ if (!radv_sqtt_sample_clocks(queue->device)) {
fprintf(stderr, "radv: Failed to sample clocks\n");
}
- radv_begin_thread_trace(queue);
- assert(!thread_trace_enabled);
- thread_trace_enabled = true;
+ radv_begin_sqtt(queue);
+ assert(!sqtt_enabled);
+ sqtt_enabled = true;
}
}
num_frames++;
if (result != VK_SUCCESS)
return result;
- radv_handle_thread_trace(_queue);
+ radv_handle_sqtt(_queue);
return VK_SUCCESS;
}
static VkResult
radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
+ struct ac_sqtt *sqtt = &device->sqtt;
+ struct rgp_code_object *code_object = &sqtt->rgp_code_object;
struct rgp_code_object_record *record;
record = malloc(sizeof(struct rgp_code_object_record));
bool result;
uint64_t base_va = ~0;
- result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash);
+ result = ac_sqtt_add_pso_correlation(&device->sqtt, pipeline->pipeline_hash);
if (!result)
return VK_ERROR_OUT_OF_HOST_MEMORY;
base_va = MIN2(base_va, va);
}
- result =
- ac_sqtt_add_code_object_loader_event(&device->thread_trace, pipeline->pipeline_hash, base_va);
+ result = ac_sqtt_add_code_object_loader_event(&device->sqtt, pipeline->pipeline_hash, base_va);
if (!result)
return VK_ERROR_OUT_OF_HOST_MEMORY;
static void
radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
- struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
- struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
+ struct ac_sqtt *sqtt = &device->sqtt;
+ struct rgp_pso_correlation *pso_correlation = &sqtt->rgp_pso_correlation;
+ struct rgp_loader_events *loader_events = &sqtt->rgp_loader_events;
+ struct rgp_code_object *code_object = &sqtt->rgp_code_object;
/* Destroy the PSO correlation record. */
simple_mtx_lock(&pso_correlation->lock);
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags)
{
const struct radv_device *device = cmd_buffer->device;
- if (unlikely(device->thread_trace.bo)) {
+ if (unlikely(device->sqtt.bo)) {
radeon_check_space(device->ws, cmd_buffer->cs, 2);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
static bool
radv_spm_trace_enabled()
{
- return radv_thread_trace_enabled() &&
- debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", false);
+ return radv_sqtt_enabled() && debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", false);
}
VKAPI_ATTR VkResult VKAPI_CALL
add_entrypoints(&b, &rage2_device_entrypoints, RADV_APP_DISPATCH_TABLE);
}
- if (radv_thread_trace_enabled())
+ if (radv_sqtt_enabled())
add_entrypoints(&b, &sqtt_device_entrypoints, RADV_RGP_DISPATCH_TABLE);
if (radv_rra_trace_enabled() && radv_enable_rt(physical_device, false))
radv_dump_enabled_options(device, stderr);
}
- if (radv_thread_trace_enabled()) {
+ if (radv_sqtt_enabled()) {
if (device->physical_device->rad_info.gfx_level < GFX8 ||
device->physical_device->rad_info.gfx_level > GFX11) {
fprintf(stderr, "GPU hardware not supported: refer to "
abort();
}
- if (!radv_thread_trace_init(device)) {
+ if (!radv_sqtt_init(device)) {
result = VK_ERROR_INITIALIZATION_FAILED;
goto fail;
}
- fprintf(stderr, "radv: Thread trace support is enabled (initial buffer size: %u MiB, "
- "instruction timing: %s, cache counters: %s).\n",
- device->thread_trace.buffer_size / (1024 * 1024),
+ fprintf(stderr,
+ "radv: Thread trace support is enabled (initial buffer size: %u MiB, "
+ "instruction timing: %s, cache counters: %s).\n",
+ device->sqtt.buffer_size / (1024 * 1024),
radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
radv_spm_trace_enabled() ? "enabled" : "disabled");
fail_meta:
radv_device_finish_meta(device);
fail:
- radv_thread_trace_finish(device);
+ radv_sqtt_finish(device);
radv_spm_finish(device);
radv_destroy_shader_arenas(device);
- radv_thread_trace_finish(device);
+ radv_sqtt_finish(device);
radv_rra_trace_finish(_device, &device->rra_trace);
#endif
bool
-radv_thread_trace_enabled(void)
+radv_sqtt_enabled(void)
{
return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
getenv("RADV_THREAD_TRACE_TRIGGER");
{
/* SQTT / SPM interfere with the register states for perf counters, and
* the code has only been tested on GFX10.3 */
- return pdev->rad_info.gfx_level == GFX10_3 && !radv_thread_trace_enabled();
+ return pdev->rad_info.gfx_level == GFX10_3 && !radv_sqtt_enabled();
}
static bool
.EXT_conditional_rendering = true,
.EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9,
.EXT_custom_border_color = true,
- .EXT_debug_marker = radv_thread_trace_enabled(),
+ .EXT_debug_marker = radv_sqtt_enabled(),
.EXT_depth_clip_control = true,
.EXT_depth_clip_enable = true,
.EXT_depth_range_unrestricted = true,
device->ws = radv_null_winsys_create();
#else
if (drm_device) {
- bool reserve_vmid = radv_thread_trace_enabled();
+ bool reserve_vmid = radv_sqtt_enabled();
device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags,
reserve_vmid);
void radv_physical_device_destroy(struct vk_physical_device *vk_device);
-bool radv_thread_trace_enabled(void);
+bool radv_sqtt_enabled(void);
struct radv_instance {
struct vk_instance vk;
struct radv_device_border_color_data border_color_data;
/* Thread trace. */
- struct ac_thread_trace_data thread_trace;
+ struct ac_sqtt sqtt;
/* Memory trace. */
struct radv_memory_trace_data memory_trace;
const struct radv_pipeline_key *pipeline_key,
struct radv_pipeline_stage *stages);
-bool radv_thread_trace_init(struct radv_device *device);
-void radv_thread_trace_finish(struct radv_device *device);
-bool radv_begin_thread_trace(struct radv_queue *queue);
-bool radv_end_thread_trace(struct radv_queue *queue);
-bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
-void radv_reset_thread_trace(struct radv_device *device);
-void radv_emit_thread_trace_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data,
- uint32_t num_dwords);
+bool radv_sqtt_init(struct radv_device *device);
+void radv_sqtt_finish(struct radv_device *device);
+bool radv_begin_sqtt(struct radv_queue *queue);
+bool radv_end_sqtt(struct radv_queue *queue);
+bool radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace);
+void radv_reset_sqtt_trace(struct radv_device *device);
+void radv_emit_sqtt_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data,
+ uint32_t num_dwords);
bool radv_is_instruction_timing_enabled(void);
-bool radv_thread_trace_sample_clocks(struct radv_device *device);
+bool radv_sqtt_sample_clocks(struct radv_device *device);
void radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs,
bool inhibit);
}
static uint32_t
-gfx11_get_thread_trace_ctrl(struct radv_device *device, bool enable)
+gfx11_get_sqtt_ctrl(struct radv_device *device, bool enable)
{
return S_0367B0_MODE(enable) | S_0367B0_HIWATER(5) | S_0367B0_UTIL_TIMER(1) |
S_0367B0_RT_FREQ(2) | /* 4096 clk */
}
static uint32_t
-gfx10_get_thread_trace_ctrl(struct radv_device *device, bool enable)
+gfx10_get_sqtt_ctrl(struct radv_device *device, bool enable)
{
- uint32_t thread_trace_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) |
- S_008D1C_UTIL_TIMER(1) | S_008D1C_RT_FREQ(2) | /* 4096 clk */
- S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) |
- S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) |
- S_008D1C_REG_DROP_ON_STALL(0);
+ uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
+ S_008D1C_RT_FREQ(2) | /* 4096 clk */
+ S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) |
+ S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) |
+ S_008D1C_REG_DROP_ON_STALL(0);
if (device->physical_device->rad_info.gfx_level == GFX10_3)
- thread_trace_ctrl |= S_008D1C_LOWATER_OFFSET(4);
+ sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4);
if (device->physical_device->rad_info.has_sqtt_auto_flush_mode_bug)
- thread_trace_ctrl |= S_008D1C_AUTO_FLUSH_MODE(1);
+ sqtt_ctrl |= S_008D1C_AUTO_FLUSH_MODE(1);
- return thread_trace_ctrl;
+ return sqtt_ctrl;
}
static void
}
static void
-radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *cs,
- enum radv_queue_family qf)
+radv_emit_sqtt_start(struct radv_device *device, struct radeon_cmdbuf *cs,
+ enum radv_queue_family qf)
{
- uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
+ uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
struct radeon_info *rad_info = &device->physical_device->rad_info;
unsigned max_se = rad_info->max_se;
for (unsigned se = 0; se < max_se; se++) {
- uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
- uint64_t data_va = ac_thread_trace_get_data_va(rad_info, &device->thread_trace, va, se);
+ uint64_t va = radv_buffer_get_va(device->sqtt.bo);
+ uint64_t data_va = ac_sqtt_get_data_va(rad_info, &device->sqtt, va, se);
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(first_active_cu / 2) |
S_0367B4_SIMD_SEL(0));
- uint32_t thread_trace_token_mask = S_0367B8_REG_INCLUDE(
+ uint32_t sqtt_token_mask = S_0367B8_REG_INCLUDE(
V_0367B8_REG_INCLUDE_SQDEC | V_0367B8_REG_INCLUDE_SHDEC | V_0367B8_REG_INCLUDE_GFXUDEC |
V_0367B8_REG_INCLUDE_COMP | V_0367B8_REG_INCLUDE_CONTEXT | V_0367B8_REG_INCLUDE_CONFIG);
V_0367B8_TOKEN_EXCLUDE_VALUINST | V_0367B8_TOKEN_EXCLUDE_IMMEDIATE |
V_0367B8_TOKEN_EXCLUDE_INST;
}
- thread_trace_token_mask |= S_0367B8_TOKEN_EXCLUDE(token_exclude);
+ sqtt_token_mask |= S_0367B8_TOKEN_EXCLUDE(token_exclude);
- radeon_set_uconfig_reg(cs, R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK, thread_trace_token_mask);
+ radeon_set_uconfig_reg(cs, R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
/* Should be emitted last (it enables thread traces). */
radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL,
- gfx11_get_thread_trace_ctrl(device, true));
+ gfx11_get_sqtt_ctrl(device, true));
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
/* Order seems important for the following 2 registers. */
radeon_set_privileged_config_reg(
S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
S_008D14_SA_SEL(0) | S_008D14_WGP_SEL(first_active_cu / 2) | S_008D14_SIMD_SEL(0));
- uint32_t thread_trace_token_mask = S_008D18_REG_INCLUDE(
+ uint32_t sqtt_token_mask = S_008D18_REG_INCLUDE(
V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC | V_008D18_REG_INCLUDE_GFXUDEC |
V_008D18_REG_INCLUDE_COMP | V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG);
V_008D18_TOKEN_EXCLUDE_IMMEDIATE |
V_008D18_TOKEN_EXCLUDE_INST;
}
- thread_trace_token_mask |= S_008D18_TOKEN_EXCLUDE(token_exclude);
+ sqtt_token_mask |= S_008D18_TOKEN_EXCLUDE(token_exclude);
- radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
- thread_trace_token_mask);
+ radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK, sqtt_token_mask);
/* Should be emitted last (it enables thread traces). */
radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
- gfx10_get_thread_trace_ctrl(device, true));
+ gfx10_get_sqtt_ctrl(device, true));
} else {
/* Order seems important for the following 4 registers. */
radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2,
radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL, S_030CD4_RESET_BUFFER(1));
- uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) |
- S_030CC8_SIMD_EN(0xf) | S_030CC8_VM_ID_MASK(0) |
- S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
- S_030CC8_SQ_STALL_EN(1);
+ uint32_t sqtt_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) |
+ S_030CC8_SIMD_EN(0xf) | S_030CC8_VM_ID_MASK(0) |
+ S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
+ S_030CC8_SQ_STALL_EN(1);
if (device->physical_device->rad_info.gfx_level < GFX9) {
- thread_trace_mask |= S_030CC8_RANDOM_SEED(0xffff);
+ sqtt_mask |= S_030CC8_RANDOM_SEED(0xffff);
}
- radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK, thread_trace_mask);
+ radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK, sqtt_mask);
/* Trace all tokens and registers. */
radeon_set_uconfig_reg(
}
/* Enable the thread trace mode. */
- uint32_t thread_trace_mode =
+ uint32_t sqtt_mode =
S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) |
S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) |
S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
if (device->physical_device->rad_info.gfx_level == GFX9) {
/* Count SQTT traffic in TCC perf counters. */
- thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
+ sqtt_mode |= S_030CD8_TC_PERF_EN(1);
}
- radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, thread_trace_mode);
+ radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, sqtt_mode);
}
}
}
}
-static const uint32_t gfx8_thread_trace_info_regs[] = {
+static const uint32_t gfx8_sqtt_info_regs[] = {
R_030CE4_SQ_THREAD_TRACE_WPTR,
R_030CE8_SQ_THREAD_TRACE_STATUS,
R_008E40_SQ_THREAD_TRACE_CNTR,
};
-static const uint32_t gfx9_thread_trace_info_regs[] = {
+static const uint32_t gfx9_sqtt_info_regs[] = {
R_030CE4_SQ_THREAD_TRACE_WPTR,
R_030CE8_SQ_THREAD_TRACE_STATUS,
R_030CF0_SQ_THREAD_TRACE_CNTR,
};
-static const uint32_t gfx10_thread_trace_info_regs[] = {
+static const uint32_t gfx10_sqtt_info_regs[] = {
R_008D10_SQ_THREAD_TRACE_WPTR,
R_008D20_SQ_THREAD_TRACE_STATUS,
R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
};
-static const uint32_t gfx11_thread_trace_info_regs[] = {
+static const uint32_t gfx11_sqtt_info_regs[] = {
R_0367BC_SQ_THREAD_TRACE_WPTR,
R_0367D0_SQ_THREAD_TRACE_STATUS,
R_0367E8_SQ_THREAD_TRACE_DROPPED_CNTR,
};
static void
-radv_copy_thread_trace_info_regs(struct radv_device *device, struct radeon_cmdbuf *cs,
- unsigned se_index)
+radv_copy_sqtt_info_regs(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned se_index)
{
const struct radv_physical_device *pdevice = device->physical_device;
- const uint32_t *thread_trace_info_regs = NULL;
+ const uint32_t *sqtt_info_regs = NULL;
if (device->physical_device->rad_info.gfx_level >= GFX11) {
- thread_trace_info_regs = gfx11_thread_trace_info_regs;
+ sqtt_info_regs = gfx11_sqtt_info_regs;
} else if (device->physical_device->rad_info.gfx_level >= GFX10) {
- thread_trace_info_regs = gfx10_thread_trace_info_regs;
+ sqtt_info_regs = gfx10_sqtt_info_regs;
} else if (device->physical_device->rad_info.gfx_level == GFX9) {
- thread_trace_info_regs = gfx9_thread_trace_info_regs;
+ sqtt_info_regs = gfx9_sqtt_info_regs;
} else {
assert(device->physical_device->rad_info.gfx_level == GFX8);
- thread_trace_info_regs = gfx8_thread_trace_info_regs;
+ sqtt_info_regs = gfx8_sqtt_info_regs;
}
/* Get the VA where the info struct is stored for this SE. */
- uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
- uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
+ uint64_t va = radv_buffer_get_va(device->sqtt.bo);
+ uint64_t info_va = ac_sqtt_get_info_va(va, se_index);
/* Copy back the info struct one DWORD at a time. */
for (unsigned i = 0; i < 3; i++) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, thread_trace_info_regs[i] >> 2);
+ radeon_emit(cs, sqtt_info_regs[i] >> 2);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, (info_va + i * 4));
radeon_emit(cs, (info_va + i * 4) >> 32);
* 2) shift right by 5 bits because SQ_THREAD_TRACE_WPTR is 32-byte aligned
* 3) mask off the higher 3 bits because WPTR.OFFSET is 29 bits
*/
- uint64_t data_va =
- ac_thread_trace_get_data_va(&pdevice->rad_info, &device->thread_trace, va, se_index);
+ uint64_t data_va = ac_sqtt_get_data_va(&pdevice->rad_info, &device->sqtt, va, se_index);
uint64_t shifted_data_va = (data_va >> 5);
uint32_t init_wptr_value = shifted_data_va & 0x1fffffff;
}
static void
-radv_emit_thread_trace_stop(struct radv_device *device, struct radeon_cmdbuf *cs,
- enum radv_queue_family qf)
+radv_emit_sqtt_stop(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{
unsigned max_se = device->physical_device->rad_info.max_se;
/* Disable the thread trace mode. */
radeon_set_uconfig_reg(cs, R_0367B0_SQ_THREAD_TRACE_CTRL,
- gfx11_get_thread_trace_ctrl(device, false));
+ gfx11_get_sqtt_ctrl(device, false));
/* Wait for thread trace completion. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
/* Disable the thread trace mode. */
radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
- gfx10_get_thread_trace_ctrl(device, false));
+ gfx10_get_sqtt_ctrl(device, false));
/* Wait for thread trace completion. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, 4); /* poll interval */
}
- radv_copy_thread_trace_info_regs(device, cs, se);
+ radv_copy_sqtt_info_regs(device, cs, se);
}
/* Restore global broadcasting. */
}
void
-radv_emit_thread_trace_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data,
- uint32_t num_dwords)
+radv_emit_sqtt_userdata(struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords)
{
struct radv_device *device = cmd_buffer->device;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
}
static bool
-radv_thread_trace_init_bo(struct radv_device *device)
+radv_sqtt_init_bo(struct radv_device *device)
{
unsigned max_se = device->physical_device->rad_info.max_se;
struct radeon_winsys *ws = device->ws;
/* The buffer size and address need to be aligned in HW regs. Align the
* size as early as possible so that we do all the allocation & addressing
* correctly. */
- device->thread_trace.buffer_size =
- align64(device->thread_trace.buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT);
+ device->sqtt.buffer_size = align64(device->sqtt.buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT);
/* Compute total size of the thread trace BO for all SEs. */
- size = align64(sizeof(struct ac_thread_trace_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT);
- size += device->thread_trace.buffer_size * (uint64_t)max_se;
+ size = align64(sizeof(struct ac_sqtt_data_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT);
+ size += device->sqtt.buffer_size * (uint64_t)max_se;
struct radeon_winsys_bo *bo = NULL;
result = ws->buffer_create(
ws, size, 4096, RADEON_DOMAIN_VRAM,
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
RADV_BO_PRIORITY_SCRATCH, 0, &bo);
- device->thread_trace.bo = bo;
+ device->sqtt.bo = bo;
if (result != VK_SUCCESS)
return false;
- result = ws->buffer_make_resident(ws, device->thread_trace.bo, true);
+ result = ws->buffer_make_resident(ws, device->sqtt.bo, true);
if (result != VK_SUCCESS)
return false;
- device->thread_trace.ptr = ws->buffer_map(device->thread_trace.bo);
- if (!device->thread_trace.ptr)
+ device->sqtt.ptr = ws->buffer_map(device->sqtt.bo);
+ if (!device->sqtt.ptr)
return false;
return true;
}
static void
-radv_thread_trace_finish_bo(struct radv_device *device)
+radv_sqtt_finish_bo(struct radv_device *device)
{
struct radeon_winsys *ws = device->ws;
- if (unlikely(device->thread_trace.bo)) {
- ws->buffer_make_resident(ws, device->thread_trace.bo, false);
- ws->buffer_destroy(ws, device->thread_trace.bo);
+ if (unlikely(device->sqtt.bo)) {
+ ws->buffer_make_resident(ws, device->sqtt.bo, false);
+ ws->buffer_destroy(ws, device->sqtt.bo);
}
}
static VkResult
radv_register_queue(struct radv_device *device, struct radv_queue *queue)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct rgp_queue_info *queue_info = &thread_trace_data->rgp_queue_info;
+ struct ac_sqtt *sqtt = &device->sqtt;
+ struct rgp_queue_info *queue_info = &sqtt->rgp_queue_info;
struct rgp_queue_info_record *record;
record = malloc(sizeof(struct rgp_queue_info_record));
static void
radv_unregister_queue(struct radv_device *device, struct radv_queue *queue)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct rgp_queue_info *queue_info = &thread_trace_data->rgp_queue_info;
+ struct ac_sqtt *sqtt = &device->sqtt;
+ struct rgp_queue_info *queue_info = &sqtt->rgp_queue_info;
/* Destroy queue info record. */
simple_mtx_lock(&queue_info->lock);
}
static void
-radv_register_queues(struct radv_device *device, struct ac_thread_trace_data *thread_trace_data)
+radv_register_queues(struct radv_device *device, struct ac_sqtt *sqtt)
{
radv_register_queue(device, &device->queues[RADV_QUEUE_GENERAL][0]);
for (uint32_t i = 0; i < device->queue_count[RADV_QUEUE_COMPUTE]; i++)
}
static void
-radv_unregister_queues(struct radv_device *device, struct ac_thread_trace_data *thread_trace_data)
+radv_unregister_queues(struct radv_device *device, struct ac_sqtt *sqtt)
{
radv_unregister_queue(device, &device->queues[RADV_QUEUE_GENERAL][0]);
for (uint32_t i = 0; i < device->queue_count[RADV_QUEUE_COMPUTE]; i++)
}
bool
-radv_thread_trace_init(struct radv_device *device)
+radv_sqtt_init(struct radv_device *device)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+ struct ac_sqtt *sqtt = &device->sqtt;
/* Default buffer size set to 32MB per SE. */
- device->thread_trace.buffer_size =
+ device->sqtt.buffer_size =
radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024);
- device->thread_trace.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
+ device->sqtt.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
if (trigger_file)
- device->thread_trace.trigger_file = strdup(trigger_file);
+ device->sqtt.trigger_file = strdup(trigger_file);
- if (!radv_thread_trace_init_bo(device))
+ if (!radv_sqtt_init_bo(device))
return false;
if (!radv_device_acquire_performance_counters(device))
return false;
- ac_thread_trace_init(thread_trace_data);
+ ac_sqtt_init(sqtt);
- radv_register_queues(device, thread_trace_data);
+ radv_register_queues(device, sqtt);
return true;
}
void
-radv_thread_trace_finish(struct radv_device *device)
+radv_sqtt_finish(struct radv_device *device)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+ struct ac_sqtt *sqtt = &device->sqtt;
struct radeon_winsys *ws = device->ws;
- free(device->thread_trace.trigger_file);
+ free(device->sqtt.trigger_file);
- radv_thread_trace_finish_bo(device);
+ radv_sqtt_finish_bo(device);
for (unsigned i = 0; i < 2; i++) {
- if (device->thread_trace.start_cs[i])
- ws->cs_destroy(device->thread_trace.start_cs[i]);
- if (device->thread_trace.stop_cs[i])
- ws->cs_destroy(device->thread_trace.stop_cs[i]);
+ if (device->sqtt.start_cs[i])
+ ws->cs_destroy(device->sqtt.start_cs[i]);
+ if (device->sqtt.stop_cs[i])
+ ws->cs_destroy(device->sqtt.stop_cs[i]);
}
- radv_unregister_queues(device, thread_trace_data);
+ radv_unregister_queues(device, sqtt);
- ac_thread_trace_finish(thread_trace_data);
+ ac_sqtt_finish(sqtt);
}
static bool
-radv_thread_trace_resize_bo(struct radv_device *device)
+radv_sqtt_resize_bo(struct radv_device *device)
{
/* Destroy the previous thread trace BO. */
- radv_thread_trace_finish_bo(device);
+ radv_sqtt_finish_bo(device);
/* Double the size of the thread trace buffer per SE. */
- device->thread_trace.buffer_size *= 2;
+ device->sqtt.buffer_size *= 2;
fprintf(stderr,
"Failed to get the thread trace because the buffer "
"was too small, resizing to %d KB\n",
- device->thread_trace.buffer_size / 1024);
+ device->sqtt.buffer_size / 1024);
/* Re-create the thread trace BO. */
- return radv_thread_trace_init_bo(device);
+ return radv_sqtt_init_bo(device);
}
bool
-radv_begin_thread_trace(struct radv_queue *queue)
+radv_begin_sqtt(struct radv_queue *queue)
{
struct radv_device *device = queue->device;
enum radv_queue_family family = queue->state.qf;
VkResult result;
/* Destroy the previous start CS and create a new one. */
- if (device->thread_trace.start_cs[family]) {
- ws->cs_destroy(device->thread_trace.start_cs[family]);
- device->thread_trace.start_cs[family] = NULL;
+ if (device->sqtt.start_cs[family]) {
+ ws->cs_destroy(device->sqtt.start_cs[family]);
+ device->sqtt.start_cs[family] = NULL;
}
cs = ws->cs_create(ws, radv_queue_ring(queue), false);
}
/* Start SQTT. */
- radv_emit_thread_trace_start(device, cs, family);
+ radv_emit_sqtt_start(device, cs, family);
if (device->spm.bo)
radv_perfcounter_emit_spm_start(device, cs, family);
return false;
}
- device->thread_trace.start_cs[family] = cs;
+ device->sqtt.start_cs[family] = cs;
return radv_queue_internal_submit(queue, cs);
}
bool
-radv_end_thread_trace(struct radv_queue *queue)
+radv_end_sqtt(struct radv_queue *queue)
{
struct radv_device *device = queue->device;
enum radv_queue_family family = queue->state.qf;
VkResult result;
/* Destroy the previous stop CS and create a new one. */
- if (queue->device->thread_trace.stop_cs[family]) {
- ws->cs_destroy(device->thread_trace.stop_cs[family]);
- device->thread_trace.stop_cs[family] = NULL;
+ if (queue->device->sqtt.stop_cs[family]) {
+ ws->cs_destroy(device->sqtt.stop_cs[family]);
+ device->sqtt.stop_cs[family] = NULL;
}
cs = ws->cs_create(ws, radv_queue_ring(queue), false);
radv_perfcounter_emit_spm_stop(device, cs, family);
/* Stop SQTT. */
- radv_emit_thread_trace_stop(device, cs, family);
+ radv_emit_sqtt_stop(device, cs, family);
radv_perfcounter_emit_spm_reset(cs);
return false;
}
- device->thread_trace.stop_cs[family] = cs;
+ device->sqtt.stop_cs[family] = cs;
return radv_queue_internal_submit(queue, cs);
}
bool
-radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace)
+radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace)
{
struct radv_device *device = queue->device;
struct radeon_info *rad_info = &device->physical_device->rad_info;
- if (!ac_sqtt_get_trace(&device->thread_trace, rad_info, thread_trace)) {
- if (!radv_thread_trace_resize_bo(device))
+ if (!ac_sqtt_get_trace(&device->sqtt, rad_info, sqtt_trace)) {
+ if (!radv_sqtt_resize_bo(device))
fprintf(stderr, "radv: Failed to resize the SQTT buffer.\n");
return false;
}
}
void
-radv_reset_thread_trace(struct radv_device *device)
+radv_reset_sqtt_trace(struct radv_device *device)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct rgp_clock_calibration *clock_calibration = &thread_trace_data->rgp_clock_calibration;
+ struct ac_sqtt *sqtt = &device->sqtt;
+ struct rgp_clock_calibration *clock_calibration = &sqtt->rgp_clock_calibration;
/* Clear clock calibration records. */
simple_mtx_lock(&clock_calibration->lock);
}
bool
-radv_thread_trace_sample_clocks(struct radv_device *device)
+radv_sqtt_sample_clocks(struct radv_device *device)
{
uint64_t cpu_timestamp = 0, gpu_timestamp = 0;
VkResult result;
if (result != VK_SUCCESS)
return false;
- return ac_sqtt_add_clock_calibration(&device->thread_trace, cpu_timestamp, gpu_timestamp);
+ return ac_sqtt_add_clock_calibration(&device->sqtt, cpu_timestamp, gpu_timestamp);
}
simple_mtx_unlock(&sscreen->async_compute_context_lock);
}
- if (unlikely(sctx->thread_trace_enabled))
+ if (unlikely(sctx->sqtt_enabled))
sctx->sqtt_next_event = EventCmdResolveImage;
if (si_msaa_resolve_blit_via_CB(ctx, info))
return;
- if (unlikely(sctx->thread_trace_enabled))
+ if (unlikely(sctx->sqtt_enabled))
sctx->sqtt_next_event = EventCmdCopyImage;
/* Using compute for copying to a linear texture in GTT is much faster than
info->src.box.z, info->src.box.z + info->src.box.depth - 1,
false);
- if (unlikely(sctx->thread_trace_enabled))
+ if (unlikely(sctx->sqtt_enabled))
sctx->sqtt_next_event = EventCmdBlitImage;
si_blitter_begin(sctx, SI_BLIT | (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
}
- if (unlikely(sctx->thread_trace_enabled)) {
+ if (unlikely(sctx->sqtt_enabled)) {
if (buffers & PIPE_CLEAR_COLOR)
sctx->sqtt_next_event = EventCmdClearColorImage;
else if (buffers & PIPE_CLEAR_DEPTHSTENCIL)
sctx->compute_shaderbuf_sgprs_dirty = true;
sctx->compute_image_sgprs_dirty = true;
- if (unlikely((sctx->screen->debug_flags & DBG(SQTT)) && sctx->thread_trace)) {
+ if (unlikely((sctx->screen->debug_flags & DBG(SQTT)) && sctx->sqtt)) {
uint32_t pipeline_code_hash = _mesa_hash_data_with_seed(
program->shader.binary.elf_buffer,
program->shader.binary.elf_size,
0);
- struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
- if (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_code_hash)) {
+ if (!si_sqtt_pipeline_is_registered(sctx->sqtt, pipeline_code_hash)) {
/* Short lived fake pipeline: we don't need to reupload the compute shaders,
* as we do for the gfx ones so just create a temp pipeline to be able to
* call si_sqtt_register_pipeline, and then drop it.
if (sctx->gfx_level >= GFX10 && waves_per_threadgroup == 1)
threadgroups_per_cu = 2;
- if (unlikely(sctx->thread_trace_enabled)) {
+ if (unlikely(sctx->sqtt_enabled)) {
si_write_event_with_dims_marker(sctx, &sctx->gfx_cs,
info->indirect ? EventCmdDispatchIndirect : EventCmdDispatch,
info->grid[0], info->grid[1], info->grid[2]);
radeon_emit(dispatch_initiator);
}
- if (unlikely(sctx->thread_trace_enabled && sctx->gfx_level >= GFX9)) {
+ if (unlikely(sctx->sqtt_enabled && sctx->gfx_level >= GFX9)) {
radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
}
tc_driver_internal_flush_notify(sctx->tc);
- if (unlikely(sctx->thread_trace &&
- (flags & PIPE_FLUSH_END_OF_FRAME))) {
- si_handle_thread_trace(sctx, &sctx->gfx_cs);
+ if (unlikely(sctx->sqtt && (flags & PIPE_FLUSH_END_OF_FRAME))) {
+ si_handle_sqtt(sctx, &sctx->gfx_cs);
}
} else {
/* Instead of flushing, create a deferred fence. Constraints:
si_check_vm_faults(ctx, &ctx->current_saved_cs->gfx, AMD_IP_GFX);
}
- if (unlikely(ctx->thread_trace &&
- (flags & PIPE_FLUSH_END_OF_FRAME))) {
- si_handle_thread_trace(ctx, &ctx->gfx_cs);
+ if (unlikely(ctx->sqtt && (flags & PIPE_FLUSH_END_OF_FRAME))) {
+ si_handle_sqtt(ctx, &ctx->gfx_cs);
}
if (ctx->current_saved_cs)
radeon_emit(0); /* DATA_HI */
radeon_emit(0); /* INT_CTXID */
- if (unlikely(ctx->thread_trace_enabled)) {
+ if (unlikely(ctx->sqtt_enabled)) {
radeon_end();
si_sqtt_describe_barrier_start(ctx, &ctx->gfx_cs);
radeon_begin_again(cs);
radeon_emit(S_585_PWS_ENA(1));
radeon_emit(gcr_cntl); /* GCR_CNTL */
- if (unlikely(ctx->thread_trace_enabled)) {
+ if (unlikely(ctx->sqtt_enabled)) {
radeon_end();
si_sqtt_describe_barrier_end(ctx, &ctx->gfx_cs, flags);
radeon_begin_again(cs);
EOP_DATA_SEL_VALUE_32BIT, wait_mem_scratch, va, ctx->wait_mem_number,
SI_NOT_QUERY);
- if (unlikely(ctx->thread_trace_enabled)) {
+ if (unlikely(ctx->sqtt_enabled)) {
si_sqtt_describe_barrier_start(ctx, &ctx->gfx_cs);
}
si_cp_wait_mem(ctx, cs, va, ctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL);
- if (unlikely(ctx->thread_trace_enabled)) {
+ if (unlikely(ctx->sqtt_enabled)) {
si_sqtt_describe_barrier_end(ctx, &ctx->gfx_cs, flags);
}
EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM, EOP_DATA_SEL_VALUE_32BIT,
wait_mem_scratch, va, sctx->wait_mem_number, SI_NOT_QUERY);
- if (unlikely(sctx->thread_trace_enabled)) {
+ if (unlikely(sctx->sqtt_enabled)) {
si_sqtt_describe_barrier_start(sctx, &sctx->gfx_cs);
}
si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff, WAIT_REG_MEM_EQUAL);
- if (unlikely(sctx->thread_trace_enabled)) {
+ if (unlikely(sctx->sqtt_enabled)) {
si_sqtt_describe_barrier_end(sctx, &sctx->gfx_cs, sctx->flags);
}
}
if (sctx->gfx_level >= GFX10 && sctx->has_graphics)
gfx10_destroy_query(sctx);
- if (sctx->thread_trace) {
+ if (sctx->sqtt) {
struct si_screen *sscreen = sctx->screen;
if (sscreen->info.has_stable_pstate && sscreen->b.num_contexts == 1 &&
!(sctx->context_flags & SI_CONTEXT_FLAG_AUX))
sscreen->ws->cs_set_pstate(&sctx->gfx_cs, RADEON_CTX_PSTATE_NONE);
- si_destroy_thread_trace(sctx);
+ si_destroy_sqtt(sctx);
}
pipe_resource_reference(&sctx->esgs_ring, NULL);
dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
- if (sctx->thread_trace_enabled)
+ if (sctx->sqtt_enabled)
si_write_user_event(sctx, &sctx->gfx_cs, UserEventTrigger, string, len);
if (sctx->log)
"detected. Force the GPU into a profiling mode with e.g. "
"\"echo profile_peak > "
"/sys/class/drm/card0/device/power_dpm_force_performance_level\"\n");
- } else if (!si_init_thread_trace((struct si_context *)ctx)) {
+ } else if (!si_init_sqtt((struct si_context *)ctx)) {
FREE(ctx);
return NULL;
}
void (*emit_spi_map[33])(struct si_context *sctx);
/* SQTT */
- struct ac_thread_trace_data *thread_trace;
+ struct ac_sqtt *sqtt;
struct ac_spm spm;
struct pipe_fence_handle *last_sqtt_fence;
enum rgp_sqtt_marker_event_type sqtt_next_event;
- bool thread_trace_enabled;
+ bool sqtt_enabled;
unsigned context_flags;
uint32_t instance_offset_user_data,
uint32_t draw_index_user_data);
bool si_sqtt_register_pipeline(struct si_context* sctx, struct si_sqtt_fake_pipeline *pipeline, bool is_compute);
-bool si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
+bool si_sqtt_pipeline_is_registered(struct ac_sqtt *sqtt,
uint64_t pipeline_hash);
void si_sqtt_describe_pipeline_bind(struct si_context* sctx, uint64_t pipeline_hash, int bind_point);
void
si_sqtt_describe_barrier_start(struct si_context* sctx, struct radeon_cmdbuf *rcs);
void
si_sqtt_describe_barrier_end(struct si_context* sctx, struct radeon_cmdbuf *rcs, unsigned flags);
-bool si_init_thread_trace(struct si_context *sctx);
-void si_destroy_thread_trace(struct si_context *sctx);
-void si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs);
+bool si_init_sqtt(struct si_context *sctx);
+void si_destroy_sqtt(struct si_context *sctx);
+void si_handle_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs);
/*
* common helpers
si_emit_spi_config_cntl(struct si_context* sctx,
struct radeon_cmdbuf *cs, bool enable);
-static bool
-si_thread_trace_init_bo(struct si_context *sctx)
-{
- unsigned max_se = sctx->screen->info.max_se;
- struct radeon_winsys *ws = sctx->ws;
- uint64_t size;
-
- /* The buffer size and address need to be aligned in HW regs. Align the
- * size as early as possible so that we do all the allocation & addressing
- * correctly. */
- sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size,
- 1u << SQTT_BUFFER_ALIGN_SHIFT);
-
- /* Compute total size of the thread trace BO for all SEs. */
- size = align64(sizeof(struct ac_thread_trace_info) * max_se,
- 1 << SQTT_BUFFER_ALIGN_SHIFT);
- size += sctx->thread_trace->buffer_size * (uint64_t)max_se;
-
- sctx->thread_trace->pipeline_bos = _mesa_hash_table_u64_create(NULL);
-
- sctx->thread_trace->bo =
- ws->buffer_create(ws, size, 4096,
- RADEON_DOMAIN_VRAM,
+static bool si_sqtt_init_bo(struct si_context *sctx) {
+ unsigned max_se = sctx->screen->info.max_se;
+ struct radeon_winsys *ws = sctx->ws;
+ uint64_t size;
+
+ /* The buffer size and address need to be aligned in HW regs. Align the
+ * size as early as possible so that we do all the allocation & addressing
+ * correctly. */
+ sctx->sqtt->buffer_size =
+ align64(sctx->sqtt->buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT);
+
+ /* Compute total size of the thread trace BO for all SEs. */
+ size = align64(sizeof(struct ac_sqtt_data_info) * max_se,
+ 1 << SQTT_BUFFER_ALIGN_SHIFT);
+ size += sctx->sqtt->buffer_size * (uint64_t)max_se;
+
+ sctx->sqtt->pipeline_bos = _mesa_hash_table_u64_create(NULL);
+
+ sctx->sqtt->bo =
+ ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM,
RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_GTT_WC |
- RADEON_FLAG_NO_SUBALLOC);
- if (!sctx->thread_trace->bo)
- return false;
+ RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_SUBALLOC);
+ if (!sctx->sqtt->bo)
+ return false;
- return true;
+ return true;
}
-static void
-si_emit_thread_trace_start(struct si_context* sctx,
- struct radeon_cmdbuf *cs,
- uint32_t queue_family_index)
-{
- struct si_screen *sscreen = sctx->screen;
- uint32_t shifted_size = sctx->thread_trace->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
- unsigned max_se = sscreen->info.max_se;
-
- radeon_begin(cs);
-
- for (unsigned se = 0; se < max_se; se++) {
- uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
- uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se);
- uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
-
- if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
- continue;
-
- /* Target SEx and SH0. */
- radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) |
- S_030800_SH_INDEX(0) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
-
- /* Select the first active CUs */
- int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
-
- if (sctx->gfx_level >= GFX10) {
- uint32_t token_mask = V_008D18_REG_INCLUDE_SQDEC |
- V_008D18_REG_INCLUDE_SHDEC |
- V_008D18_REG_INCLUDE_GFXUDEC |
- V_008D18_REG_INCLUDE_CONTEXT |
- V_008D18_REG_INCLUDE_COMP |
- V_008D18_REG_INCLUDE_CONFIG;
- int wgp = first_active_cu / 2;
- unsigned shader_mask = 0x7f; /* all shader stages */
-
- /* Order seems important for the following 2 registers. */
- if (sctx->gfx_level >= GFX11) {
- /* Disable unsupported hw shader stages */
- shader_mask &= ~(0x02 /* VS */ | 0x08 /* ES */ | 0x20 /* LS */);
-
- radeon_set_uconfig_reg(R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
- S_0367A4_SIZE(shifted_size) |
- S_0367A4_BASE_HI(shifted_va >> 32));
-
- radeon_set_uconfig_reg(R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
-
- radeon_set_uconfig_reg(R_0367B4_SQ_THREAD_TRACE_MASK,
- S_0367B4_WTYPE_INCLUDE(shader_mask) |
- S_0367B4_SA_SEL(0) |
- S_0367B4_WGP_SEL(wgp) |
- S_0367B4_SIMD_SEL(0));
-
- radeon_set_uconfig_reg(R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK,
- S_0367B8_REG_INCLUDE(token_mask) |
- S_0367B8_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
- } else {
- radeon_set_privileged_config_reg(R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
- S_008D04_SIZE(shifted_size) |
- S_008D04_BASE_HI(shifted_va >> 32));
-
- radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
-
- radeon_set_privileged_config_reg(R_008D14_SQ_THREAD_TRACE_MASK,
- S_008D14_WTYPE_INCLUDE(shader_mask) |
- S_008D14_SA_SEL(0) |
- S_008D14_WGP_SEL(wgp) |
- S_008D14_SIMD_SEL(0));
-
- radeon_set_privileged_config_reg(R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
- S_008D18_REG_INCLUDE(token_mask) |
- S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
- }
-
- /* Should be emitted last (it enables thread traces). */
- uint32_t ctrl = S_008D1C_MODE(1) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
- S_008D1C_RT_FREQ(2) | /* 4096 clk */S_008D1C_DRAW_EVENT_EN(1);
-
- if (sctx->gfx_level == GFX10_3)
- ctrl |= S_008D1C_LOWATER_OFFSET(4);
-
- ctrl |= S_008D1C_AUTO_FLUSH_MODE(sctx->screen->info.has_sqtt_auto_flush_mode_bug);
-
- switch (sctx->gfx_level) {
- case GFX10:
- case GFX10_3:
- ctrl |= S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
- S_008D1C_SQ_STALL_EN(1) |S_008D1C_REG_DROP_ON_STALL(0);
- radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, ctrl);
- break;
- case GFX11:
- ctrl |= S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) |
- S_0367B0_REG_AT_HWM(2);
- radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, ctrl);
- break;
- default:
- assert(false);
- }
+static void si_emit_sqtt_start(struct si_context *sctx,
+ struct radeon_cmdbuf *cs,
+ uint32_t queue_family_index) {
+ struct si_screen *sscreen = sctx->screen;
+ uint32_t shifted_size = sctx->sqtt->buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
+ unsigned max_se = sscreen->info.max_se;
+
+ radeon_begin(cs);
+
+ for (unsigned se = 0; se < max_se; se++) {
+ uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->sqtt->bo);
+ uint64_t data_va =
+ ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, va, se);
+ uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
+
+ if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
+ continue;
+
+ /* Target SEx and SH0. */
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+ /* Select the first active CUs */
+ int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]);
+
+ if (sctx->gfx_level >= GFX10) {
+ uint32_t token_mask =
+ V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC |
+ V_008D18_REG_INCLUDE_GFXUDEC | V_008D18_REG_INCLUDE_CONTEXT |
+ V_008D18_REG_INCLUDE_COMP | V_008D18_REG_INCLUDE_CONFIG;
+ int wgp = first_active_cu / 2;
+ unsigned shader_mask = 0x7f; /* all shader stages */
+
+ /* Order seems important for the following 2 registers. */
+ if (sctx->gfx_level >= GFX11) {
+ /* Disable unsupported hw shader stages */
+ shader_mask &= ~(0x02 /* VS */ | 0x08 /* ES */ | 0x20 /* LS */);
+
+ radeon_set_uconfig_reg(R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
+ S_0367A4_SIZE(shifted_size) |
+ S_0367A4_BASE_HI(shifted_va >> 32));
+
+ radeon_set_uconfig_reg(R_0367A0_SQ_THREAD_TRACE_BUF0_BASE, shifted_va);
+
+ radeon_set_uconfig_reg(R_0367B4_SQ_THREAD_TRACE_MASK,
+ S_0367B4_WTYPE_INCLUDE(shader_mask) |
+ S_0367B4_SA_SEL(0) | S_0367B4_WGP_SEL(wgp) |
+ S_0367B4_SIMD_SEL(0));
+
+ radeon_set_uconfig_reg(
+ R_0367B8_SQ_THREAD_TRACE_TOKEN_MASK,
+ S_0367B8_REG_INCLUDE(token_mask) |
+ S_0367B8_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
} else {
- /* Order seems important for the following 4 registers. */
- radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2,
- S_030CDC_ADDR_HI(shifted_va >> 32));
-
- radeon_set_uconfig_reg(R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va);
-
- radeon_set_uconfig_reg(R_030CC4_SQ_THREAD_TRACE_SIZE,
- S_030CC4_SIZE(shifted_size));
-
- radeon_set_uconfig_reg(R_030CD4_SQ_THREAD_TRACE_CTRL,
- S_030CD4_RESET_BUFFER(1));
-
- uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) |
- S_030CC8_SH_SEL(0) |
- S_030CC8_SIMD_EN(0xf) |
- S_030CC8_VM_ID_MASK(0) |
- S_030CC8_REG_STALL_EN(1) |
- S_030CC8_SPI_STALL_EN(1) |
- S_030CC8_SQ_STALL_EN(1);
-
- radeon_set_uconfig_reg(R_030CC8_SQ_THREAD_TRACE_MASK,
- thread_trace_mask);
-
- /* Trace all tokens and registers. */
- radeon_set_uconfig_reg(R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
- S_030CCC_TOKEN_MASK(0xbfff) |
- S_030CCC_REG_MASK(0xff) |
- S_030CCC_REG_DROP_ON_STALL(0));
-
- /* Enable SQTT perf counters for all CUs. */
- radeon_set_uconfig_reg(R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
- S_030CD0_SH0_MASK(0xffff) |
- S_030CD0_SH1_MASK(0xffff));
-
- radeon_set_uconfig_reg(R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2, 0xffffffff);
-
- radeon_set_uconfig_reg(R_030CEC_SQ_THREAD_TRACE_HIWATER,
- S_030CEC_HIWATER(4));
-
- if (sctx->gfx_level == GFX9) {
- /* Reset thread trace status errors. */
- radeon_set_uconfig_reg(R_030CE8_SQ_THREAD_TRACE_STATUS,
- S_030CE8_UTC_ERROR(0));
- }
-
- /* Enable the thread trace mode. */
- uint32_t thread_trace_mode =
- S_030CD8_MASK_PS(1) |
- S_030CD8_MASK_VS(1) |
- S_030CD8_MASK_GS(1) |
- S_030CD8_MASK_ES(1) |
- S_030CD8_MASK_HS(1) |
- S_030CD8_MASK_LS(1) |
- S_030CD8_MASK_CS(1) |
- S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
- S_030CD8_MODE(1);
-
- if (sctx->gfx_level == GFX9) {
- /* Count SQTT traffic in TCC perf counters. */
- thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
- }
-
- radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE,
- thread_trace_mode);
+ radeon_set_privileged_config_reg(
+ R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
+ S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
+
+ radeon_set_privileged_config_reg(R_008D00_SQ_THREAD_TRACE_BUF0_BASE,
+ shifted_va);
+
+ radeon_set_privileged_config_reg(
+ R_008D14_SQ_THREAD_TRACE_MASK,
+ S_008D14_WTYPE_INCLUDE(shader_mask) | S_008D14_SA_SEL(0) |
+ S_008D14_WGP_SEL(wgp) | S_008D14_SIMD_SEL(0));
+
+ radeon_set_privileged_config_reg(
+ R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
+ S_008D18_REG_INCLUDE(token_mask) |
+ S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF));
}
- }
-
- /* Restore global broadcasting. */
- radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) |
- S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
-
- /* Start the thread trace with a different event based on the queue. */
- if (queue_family_index == AMD_IP_COMPUTE) {
- radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
- S_00B878_THREAD_TRACE_ENABLE(1));
- } else {
- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
- }
- radeon_end();
-}
-static const uint32_t gfx9_thread_trace_info_regs[] =
-{
- R_030CE4_SQ_THREAD_TRACE_WPTR,
- R_030CE8_SQ_THREAD_TRACE_STATUS,
- R_030CF0_SQ_THREAD_TRACE_CNTR,
-};
+ /* Should be emitted last (it enables thread traces). */
+ uint32_t ctrl = S_008D1C_MODE(1) | S_008D1C_HIWATER(5) |
+ S_008D1C_UTIL_TIMER(1) | S_008D1C_RT_FREQ(2) |
+ /* 4096 clk */ S_008D1C_DRAW_EVENT_EN(1);
+
+ if (sctx->gfx_level == GFX10_3)
+ ctrl |= S_008D1C_LOWATER_OFFSET(4);
+
+ ctrl |= S_008D1C_AUTO_FLUSH_MODE(
+ sctx->screen->info.has_sqtt_auto_flush_mode_bug);
+
+ switch (sctx->gfx_level) {
+ case GFX10:
+ case GFX10_3:
+ ctrl |= S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
+ S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0);
+ radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, ctrl);
+ break;
+ case GFX11:
+ ctrl |= S_0367B0_SPI_STALL_EN(1) | S_0367B0_SQ_STALL_EN(1) |
+ S_0367B0_REG_AT_HWM(2);
+ radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, ctrl);
+ break;
+ default:
+ assert(false);
+ }
+ } else {
+ /* Order seems important for the following 4 registers. */
+ radeon_set_uconfig_reg(R_030CDC_SQ_THREAD_TRACE_BASE2,
+ S_030CDC_ADDR_HI(shifted_va >> 32));
-static const uint32_t gfx10_thread_trace_info_regs[] =
-{
- R_008D10_SQ_THREAD_TRACE_WPTR,
- R_008D20_SQ_THREAD_TRACE_STATUS,
- R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
-};
+ radeon_set_uconfig_reg(R_030CC0_SQ_THREAD_TRACE_BASE, shifted_va);
-static const uint32_t gfx11_thread_trace_info_regs[] =
-{
- R_0367BC_SQ_THREAD_TRACE_WPTR,
- R_0367D0_SQ_THREAD_TRACE_STATUS,
- R_0367E8_SQ_THREAD_TRACE_DROPPED_CNTR,
-};
+ radeon_set_uconfig_reg(R_030CC4_SQ_THREAD_TRACE_SIZE,
+ S_030CC4_SIZE(shifted_size));
+ radeon_set_uconfig_reg(R_030CD4_SQ_THREAD_TRACE_CTRL,
+ S_030CD4_RESET_BUFFER(1));
-static void
-si_copy_thread_trace_info_regs(struct si_context* sctx,
- struct radeon_cmdbuf *cs,
- unsigned se_index)
-{
- const uint32_t *thread_trace_info_regs = NULL;
-
- switch (sctx->gfx_level) {
- case GFX10_3:
- case GFX10:
- thread_trace_info_regs = gfx10_thread_trace_info_regs;
- break;
- case GFX11:
- thread_trace_info_regs = gfx11_thread_trace_info_regs;
- break;
- case GFX9:
- thread_trace_info_regs = gfx9_thread_trace_info_regs;
- break;
- default:
- unreachable("Unsupported gfx_level");
- }
+ uint32_t sqtt_mask = S_030CC8_CU_SEL(first_active_cu) |
+ S_030CC8_SH_SEL(0) | S_030CC8_SIMD_EN(0xf) |
+ S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) |
+ S_030CC8_SPI_STALL_EN(1) | S_030CC8_SQ_STALL_EN(1);
- /* Get the VA where the info struct is stored for this SE. */
- uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo);
- uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
+ radeon_set_uconfig_reg(R_030CC8_SQ_THREAD_TRACE_MASK, sqtt_mask);
- radeon_begin(cs);
+ /* Trace all tokens and registers. */
+ radeon_set_uconfig_reg(R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
+ S_030CCC_TOKEN_MASK(0xbfff) |
+ S_030CCC_REG_MASK(0xff) |
+ S_030CCC_REG_DROP_ON_STALL(0));
- /* Copy back the info struct one DWORD at a time. */
- for (unsigned i = 0; i < 3; i++) {
- radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
- COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(thread_trace_info_regs[i] >> 2);
- radeon_emit(0); /* unused */
- radeon_emit((info_va + i * 4));
- radeon_emit((info_va + i * 4) >> 32);
- }
+ /* Enable SQTT perf counters for all CUs. */
+ radeon_set_uconfig_reg(R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
+ S_030CD0_SH0_MASK(0xffff) |
+ S_030CD0_SH1_MASK(0xffff));
- if (sctx->gfx_level == GFX11) {
- /* On GFX11, WPTR is incremented from the offset of the current buffer base address and it
- * needs to be subtracted to get the correct offset:
- *
- * 1) get the current buffer base address for this SE
- * 2) shift right by 5 bits because SQ_THREAD_TRACE_WPTR is 32-byte aligned
- * 3) mask off the higher 3 bits because WPTR.OFFSET is 29 bits
- */
- uint64_t data_va =
- ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se_index);
- uint64_t shifted_data_va = (data_va >> 5);
- uint64_t init_wptr_value = shifted_data_va & 0x1fffffff;
-
- radeon_emit(PKT3(PKT3_ATOMIC_MEM, 7, 0));
- radeon_emit(ATOMIC_OP(TC_OP_ATOMIC_SUB_32));
- radeon_emit(info_va);
- radeon_emit(info_va >> 32);
- radeon_emit(init_wptr_value);
- radeon_emit(init_wptr_value >> 32);
- radeon_emit(0);
- radeon_emit(0);
- radeon_emit(0);
- }
-
- radeon_end();
-}
+ radeon_set_uconfig_reg(R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2, 0xffffffff);
+ radeon_set_uconfig_reg(R_030CEC_SQ_THREAD_TRACE_HIWATER,
+ S_030CEC_HIWATER(4));
+ if (sctx->gfx_level == GFX9) {
+ /* Reset thread trace status errors. */
+ radeon_set_uconfig_reg(R_030CE8_SQ_THREAD_TRACE_STATUS,
+ S_030CE8_UTC_ERROR(0));
+ }
-static void
-si_emit_thread_trace_stop(struct si_context *sctx,
- struct radeon_cmdbuf *cs,
- uint32_t queue_family_index)
-{
- unsigned max_se = sctx->screen->info.max_se;
+ /* Enable the thread trace mode. */
+ uint32_t sqtt_mode = S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) |
+ S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) |
+ S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) |
+ S_030CD8_MASK_CS(1) |
+ S_030CD8_AUTOFLUSH_EN(
+ 1) | /* periodically flush SQTT data to memory */
+ S_030CD8_MODE(1);
+
+ if (sctx->gfx_level == GFX9) {
+ /* Count SQTT traffic in TCC perf counters. */
+ sqtt_mode |= S_030CD8_TC_PERF_EN(1);
+ }
- radeon_begin(cs);
+ radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE, sqtt_mode);
+ }
+ }
- /* Stop the thread trace with a different event based on the queue. */
- if (queue_family_index == AMD_IP_COMPUTE) {
- radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
- S_00B878_THREAD_TRACE_ENABLE(0));
- } else {
- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
- }
+ /* Restore global broadcasting. */
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) |
+ S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
- radeon_end();
+ /* Start the thread trace with a different event based on the queue. */
+ if (queue_family_index == AMD_IP_COMPUTE) {
+ radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
+ S_00B878_THREAD_TRACE_ENABLE(1));
+ } else {
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
+ }
+ radeon_end();
+}
- if (sctx->screen->info.has_sqtt_rb_harvest_bug) {
- /* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */
- sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB |
- SI_CONTEXT_CS_PARTIAL_FLUSH;
- sctx->emit_cache_flush(sctx, cs);
- }
+static const uint32_t gfx9_sqtt_info_regs[] = {
+ R_030CE4_SQ_THREAD_TRACE_WPTR,
+ R_030CE8_SQ_THREAD_TRACE_STATUS,
+ R_030CF0_SQ_THREAD_TRACE_CNTR,
+};
- for (unsigned se = 0; se < max_se; se++) {
- if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
- continue;
+static const uint32_t gfx10_sqtt_info_regs[] = {
+ R_008D10_SQ_THREAD_TRACE_WPTR,
+ R_008D20_SQ_THREAD_TRACE_STATUS,
+ R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
+};
- radeon_begin(cs);
+static const uint32_t gfx11_sqtt_info_regs[] = {
+ R_0367BC_SQ_THREAD_TRACE_WPTR,
+ R_0367D0_SQ_THREAD_TRACE_STATUS,
+ R_0367E8_SQ_THREAD_TRACE_DROPPED_CNTR,
+};
- /* Target SEi and SH0. */
- radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) |
- S_030800_SH_INDEX(0) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+static void si_copy_sqtt_info_regs(struct si_context *sctx,
+ struct radeon_cmdbuf *cs,
+ unsigned se_index) {
+ const uint32_t *sqtt_info_regs = NULL;
+
+ switch (sctx->gfx_level) {
+ case GFX10_3:
+ case GFX10:
+ sqtt_info_regs = gfx10_sqtt_info_regs;
+ break;
+ case GFX11:
+ sqtt_info_regs = gfx11_sqtt_info_regs;
+ break;
+ case GFX9:
+ sqtt_info_regs = gfx9_sqtt_info_regs;
+ break;
+ default:
+ unreachable("Unsupported gfx_level");
+ }
+
+ /* Get the VA where the info struct is stored for this SE. */
+ uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->sqtt->bo);
+ uint64_t info_va = ac_sqtt_get_info_va(va, se_index);
+
+ radeon_begin(cs);
+
+ /* Copy back the info struct one DWORD at a time. */
+ for (unsigned i = 0; i < 3; i++) {
+ radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
+ COPY_DATA_DST_SEL(COPY_DATA_TC_L2) | COPY_DATA_WR_CONFIRM);
+ radeon_emit(sqtt_info_regs[i] >> 2);
+ radeon_emit(0); /* unused */
+ radeon_emit((info_va + i * 4));
+ radeon_emit((info_va + i * 4) >> 32);
+ }
+
+ if (sctx->gfx_level == GFX11) {
+ /* On GFX11, WPTR is incremented from the offset of the current buffer base
+ * address and it needs to be subtracted to get the correct offset:
+ *
+ * 1) get the current buffer base address for this SE
+ * 2) shift right by 5 bits because SQ_THREAD_TRACE_WPTR is 32-byte aligned
+ * 3) mask off the higher 3 bits because WPTR.OFFSET is 29 bits
+ */
+ uint64_t data_va =
+ ac_sqtt_get_data_va(&sctx->screen->info, sctx->sqtt, va, se_index);
+ uint64_t shifted_data_va = (data_va >> 5);
+ uint64_t init_wptr_value = shifted_data_va & 0x1fffffff;
+
+ radeon_emit(PKT3(PKT3_ATOMIC_MEM, 7, 0));
+ radeon_emit(ATOMIC_OP(TC_OP_ATOMIC_SUB_32));
+ radeon_emit(info_va);
+ radeon_emit(info_va >> 32);
+ radeon_emit(init_wptr_value);
+ radeon_emit(init_wptr_value >> 32);
+ radeon_emit(0);
+ radeon_emit(0);
+ radeon_emit(0);
+ }
+
+ radeon_end();
+}
- if (sctx->gfx_level >= GFX10) {
- uint32_t tt_status_reg = sctx->gfx_level >= GFX11 ? R_0367D0_SQ_THREAD_TRACE_STATUS :
- R_008D20_SQ_THREAD_TRACE_STATUS;
- if (!sctx->screen->info.has_sqtt_rb_harvest_bug) {
- /* Make sure to wait for the trace buffer. */
- radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(tt_status_reg >> 2); /* register */
- radeon_emit(0);
- radeon_emit(0); /* reference value */
- radeon_emit(sctx->gfx_level >= GFX11 ? ~C_0367D0_FINISH_DONE : ~C_008D20_FINISH_DONE); /* mask */
- radeon_emit(4); /* poll interval */
- }
-
- /* Disable the thread trace mode. */
- if (sctx->gfx_level >= GFX11)
- radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
- else
- radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
-
- /* Wait for thread trace completion. */
- radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(tt_status_reg >> 2); /* register */
- radeon_emit(0);
- radeon_emit(0); /* reference value */
- radeon_emit(sctx->gfx_level >= GFX11 ? ~C_0367D0_BUSY : ~C_008D20_BUSY); /* mask */
- radeon_emit(4); /* poll interval */
- } else {
- /* Disable the thread trace mode. */
- radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE,
- S_030CD8_MODE(0));
-
- /* Wait for thread trace completion. */
- radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */
- radeon_emit(0);
- radeon_emit(0); /* reference value */
- radeon_emit(~C_030CE8_BUSY); /* mask */
- radeon_emit(4); /* poll interval */
+static void si_emit_sqtt_stop(struct si_context *sctx, struct radeon_cmdbuf *cs,
+ uint32_t queue_family_index) {
+ unsigned max_se = sctx->screen->info.max_se;
+
+ radeon_begin(cs);
+
+ /* Stop the thread trace with a different event based on the queue. */
+ if (queue_family_index == AMD_IP_COMPUTE) {
+ radeon_set_sh_reg(R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
+ S_00B878_THREAD_TRACE_ENABLE(0));
+ } else {
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
+ }
+
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
+ radeon_end();
+
+ if (sctx->screen->info.has_sqtt_rb_harvest_bug) {
+ /* Some chips with disabled RBs should wait for idle because FINISH_DONE
+ * doesn't work. */
+ sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_FLUSH_AND_INV_DB |
+ SI_CONTEXT_CS_PARTIAL_FLUSH;
+ sctx->emit_cache_flush(sctx, cs);
+ }
+
+ for (unsigned se = 0; se < max_se; se++) {
+ if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
+ continue;
+
+ radeon_begin(cs);
+
+ /* Target SEi and SH0. */
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+ if (sctx->gfx_level >= GFX10) {
+ uint32_t tt_status_reg = sctx->gfx_level >= GFX11
+ ? R_0367D0_SQ_THREAD_TRACE_STATUS
+ : R_008D20_SQ_THREAD_TRACE_STATUS;
+ if (!sctx->screen->info.has_sqtt_rb_harvest_bug) {
+ /* Make sure to wait for the trace buffer. */
+ radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal
+ to the reference value */
+ radeon_emit(tt_status_reg >> 2); /* register */
+ radeon_emit(0);
+ radeon_emit(0); /* reference value */
+ radeon_emit(sctx->gfx_level >= GFX11
+ ? ~C_0367D0_FINISH_DONE
+ : ~C_008D20_FINISH_DONE); /* mask */
+ radeon_emit(4); /* poll interval */
}
- radeon_end();
- si_copy_thread_trace_info_regs(sctx, cs, se);
- }
-
- /* Restore global broadcasting. */
- radeon_begin_again(cs);
- radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) |
+ /* Disable the thread trace mode. */
+ if (sctx->gfx_level >= GFX11)
+ radeon_set_uconfig_reg(R_0367B0_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
+ else
+ radeon_set_privileged_config_reg(R_008D1C_SQ_THREAD_TRACE_CTRL,
+ S_008D1C_MODE(0));
+
+ /* Wait for thread trace completion. */
+ radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to
+ the reference value */
+ radeon_emit(tt_status_reg >> 2); /* register */
+ radeon_emit(0);
+ radeon_emit(0); /* reference value */
+ radeon_emit(sctx->gfx_level >= GFX11 ? ~C_0367D0_BUSY
+ : ~C_008D20_BUSY); /* mask */
+ radeon_emit(4); /* poll interval */
+ } else {
+ /* Disable the thread trace mode. */
+ radeon_set_uconfig_reg(R_030CD8_SQ_THREAD_TRACE_MODE, S_030CD8_MODE(0));
+
+ /* Wait for thread trace completion. */
+ radeon_emit(PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(WAIT_REG_MEM_EQUAL); /* wait until the register is equal to
+ the reference value */
+ radeon_emit(R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */
+ radeon_emit(0);
+ radeon_emit(0); /* reference value */
+ radeon_emit(~C_030CE8_BUSY); /* mask */
+ radeon_emit(4); /* poll interval */
+ }
+ radeon_end();
+
+ si_copy_sqtt_info_regs(sctx, cs, se);
+ }
+
+ /* Restore global broadcasting. */
+ radeon_begin_again(cs);
+ radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) |
S_030800_SH_BROADCAST_WRITES(1) |
S_030800_INSTANCE_BROADCAST_WRITES(1));
- radeon_end();
+ radeon_end();
}
-static void
-si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
-{
- struct radeon_winsys *ws = sctx->ws;
-
- radeon_begin(cs);
-
- switch (family) {
- case AMD_IP_GFX:
- radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
- radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
- break;
- case AMD_IP_COMPUTE:
- radeon_emit(PKT3(PKT3_NOP, 0, 0));
- radeon_emit(0);
- break;
- }
- radeon_end();
-
- ws->cs_add_buffer(cs,
- sctx->thread_trace->bo,
- RADEON_USAGE_READWRITE,
- RADEON_DOMAIN_VRAM);
- if (sctx->spm.bo)
- ws->cs_add_buffer(cs,
- sctx->spm.bo,
- RADEON_USAGE_READWRITE,
- RADEON_DOMAIN_VRAM);
-
- si_cp_dma_wait_for_idle(sctx, cs);
-
- /* Make sure to wait-for-idle before starting SQTT. */
- sctx->flags |=
- SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
- SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
- SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME;
- sctx->emit_cache_flush(sctx, cs);
-
- si_inhibit_clockgating(sctx, cs, true);
-
- /* Enable SQG events that collects thread trace data. */
- si_emit_spi_config_cntl(sctx, cs, true);
-
- if (sctx->spm.bo) {
- si_pc_emit_spm_reset(cs);
- si_pc_emit_shaders(cs, 0x7f);
- si_emit_spm_setup(sctx, cs);
- }
-
- si_emit_thread_trace_start(sctx, cs, family);
-
- if (sctx->spm.bo)
- si_pc_emit_spm_start(cs);
+static void si_sqtt_start(struct si_context *sctx, int family,
+ struct radeon_cmdbuf *cs) {
+ struct radeon_winsys *ws = sctx->ws;
+
+ radeon_begin(cs);
+
+ switch (family) {
+ case AMD_IP_GFX:
+ radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
+ radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
+ break;
+ case AMD_IP_COMPUTE:
+ radeon_emit(PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(0);
+ break;
+ }
+ radeon_end();
+
+ ws->cs_add_buffer(cs, sctx->sqtt->bo, RADEON_USAGE_READWRITE,
+ RADEON_DOMAIN_VRAM);
+ if (sctx->spm.bo)
+ ws->cs_add_buffer(cs, sctx->spm.bo, RADEON_USAGE_READWRITE,
+ RADEON_DOMAIN_VRAM);
+
+ si_cp_dma_wait_for_idle(sctx, cs);
+
+ /* Make sure to wait-for-idle before starting SQTT. */
+ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
+ SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
+ SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
+ SI_CONTEXT_PFP_SYNC_ME;
+ sctx->emit_cache_flush(sctx, cs);
+
+ si_inhibit_clockgating(sctx, cs, true);
+
+ /* Enable SQG events that collects thread trace data. */
+ si_emit_spi_config_cntl(sctx, cs, true);
+
+ if (sctx->spm.bo) {
+ si_pc_emit_spm_reset(cs);
+ si_pc_emit_shaders(cs, 0x7f);
+ si_emit_spm_setup(sctx, cs);
+ }
+
+ si_emit_sqtt_start(sctx, cs, family);
+
+ if (sctx->spm.bo)
+ si_pc_emit_spm_start(cs);
}
-static void
-si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *cs)
-{
- struct radeon_winsys *ws = sctx->ws;
+static void si_sqtt_stop(struct si_context *sctx, int family,
+ struct radeon_cmdbuf *cs) {
+ struct radeon_winsys *ws = sctx->ws;
- radeon_begin(cs);
+ radeon_begin(cs);
- switch (family) {
- case AMD_IP_GFX:
- radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
- radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
- break;
- case AMD_IP_COMPUTE:
- radeon_emit(PKT3(PKT3_NOP, 0, 0));
- radeon_emit(0);
- break;
- }
- radeon_end();
+ switch (family) {
+ case AMD_IP_GFX:
+ radeon_emit(PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(CC0_UPDATE_LOAD_ENABLES(1));
+ radeon_emit(CC1_UPDATE_SHADOW_ENABLES(1));
+ break;
+ case AMD_IP_COMPUTE:
+ radeon_emit(PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(0);
+ break;
+ }
+ radeon_end();
- ws->cs_add_buffer(cs,
- sctx->thread_trace->bo,
- RADEON_USAGE_READWRITE,
- RADEON_DOMAIN_VRAM);
+ ws->cs_add_buffer(cs, sctx->sqtt->bo, RADEON_USAGE_READWRITE,
+ RADEON_DOMAIN_VRAM);
- if (sctx->spm.bo)
- ws->cs_add_buffer(cs,
- sctx->spm.bo,
- RADEON_USAGE_READWRITE,
- RADEON_DOMAIN_VRAM);
+ if (sctx->spm.bo)
+ ws->cs_add_buffer(cs, sctx->spm.bo, RADEON_USAGE_READWRITE,
+ RADEON_DOMAIN_VRAM);
- si_cp_dma_wait_for_idle(sctx, cs);
+ si_cp_dma_wait_for_idle(sctx, cs);
- if (sctx->spm.bo)
- si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters,
- sctx->screen->info.never_send_perfcounter_stop);
+ if (sctx->spm.bo)
+ si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters,
+ sctx->screen->info.never_send_perfcounter_stop);
- /* Make sure to wait-for-idle before stopping SQTT. */
- sctx->flags |=
- SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
- SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE |
- SI_CONTEXT_INV_L2 | SI_CONTEXT_PFP_SYNC_ME;
- sctx->emit_cache_flush(sctx, cs);
+ /* Make sure to wait-for-idle before stopping SQTT. */
+ sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
+ SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_SCACHE |
+ SI_CONTEXT_INV_VCACHE | SI_CONTEXT_INV_L2 |
+ SI_CONTEXT_PFP_SYNC_ME;
+ sctx->emit_cache_flush(sctx, cs);
- si_emit_thread_trace_stop(sctx, cs, family);
+ si_emit_sqtt_stop(sctx, cs, family);
- if (sctx->spm.bo)
- si_pc_emit_spm_reset(cs);
+ if (sctx->spm.bo)
+ si_pc_emit_spm_reset(cs);
- /* Restore previous state by disabling SQG events. */
- si_emit_spi_config_cntl(sctx, cs, false);
+ /* Restore previous state by disabling SQG events. */
+ si_emit_spi_config_cntl(sctx, cs, false);
- si_inhibit_clockgating(sctx, cs, false);
+ si_inhibit_clockgating(sctx, cs, false);
}
-
-static void
-si_thread_trace_init_cs(struct si_context *sctx)
-{
- struct radeon_winsys *ws = sctx->ws;
-
- /* Thread trace start CS (only handles AMD_IP_GFX). */
- sctx->thread_trace->start_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
- if (!ws->cs_create(sctx->thread_trace->start_cs[AMD_IP_GFX],
- sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) {
- free(sctx->thread_trace->start_cs[AMD_IP_GFX]);
- sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL;
- return;
- }
-
- si_thread_trace_start(sctx, AMD_IP_GFX, sctx->thread_trace->start_cs[AMD_IP_GFX]);
-
- /* Thread trace stop CS. */
- sctx->thread_trace->stop_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
- if (!ws->cs_create(sctx->thread_trace->stop_cs[AMD_IP_GFX],
- sctx->ctx, AMD_IP_GFX, NULL, NULL, 0)) {
- free(sctx->thread_trace->start_cs[AMD_IP_GFX]);
- sctx->thread_trace->start_cs[AMD_IP_GFX] = NULL;
- free(sctx->thread_trace->stop_cs[AMD_IP_GFX]);
- sctx->thread_trace->stop_cs[AMD_IP_GFX] = NULL;
- return;
- }
-
- si_thread_trace_stop(sctx, AMD_IP_GFX, sctx->thread_trace->stop_cs[AMD_IP_GFX]);
+static void si_sqtt_init_cs(struct si_context *sctx) {
+ struct radeon_winsys *ws = sctx->ws;
+
+ /* Thread trace start CS (only handles AMD_IP_GFX). */
+ sctx->sqtt->start_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
+ if (!ws->cs_create(sctx->sqtt->start_cs[AMD_IP_GFX], sctx->ctx, AMD_IP_GFX,
+ NULL, NULL, 0)) {
+ free(sctx->sqtt->start_cs[AMD_IP_GFX]);
+ sctx->sqtt->start_cs[AMD_IP_GFX] = NULL;
+ return;
+ }
+
+ si_sqtt_start(sctx, AMD_IP_GFX, sctx->sqtt->start_cs[AMD_IP_GFX]);
+
+ /* Thread trace stop CS. */
+ sctx->sqtt->stop_cs[AMD_IP_GFX] = CALLOC_STRUCT(radeon_cmdbuf);
+ if (!ws->cs_create(sctx->sqtt->stop_cs[AMD_IP_GFX], sctx->ctx, AMD_IP_GFX,
+ NULL, NULL, 0)) {
+ free(sctx->sqtt->start_cs[AMD_IP_GFX]);
+ sctx->sqtt->start_cs[AMD_IP_GFX] = NULL;
+ free(sctx->sqtt->stop_cs[AMD_IP_GFX]);
+ sctx->sqtt->stop_cs[AMD_IP_GFX] = NULL;
+ return;
+ }
+
+ si_sqtt_stop(sctx, AMD_IP_GFX, sctx->sqtt->stop_cs[AMD_IP_GFX]);
}
-static void
-si_begin_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
-{
- struct radeon_cmdbuf *cs = sctx->thread_trace->start_cs[AMD_IP_GFX];
- sctx->ws->cs_flush(cs, 0, NULL);
+static void si_begin_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs) {
+ struct radeon_cmdbuf *cs = sctx->sqtt->start_cs[AMD_IP_GFX];
+ sctx->ws->cs_flush(cs, 0, NULL);
}
-static void
-si_end_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
-{
- struct radeon_cmdbuf *cs = sctx->thread_trace->stop_cs[AMD_IP_GFX];
- sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence);
+static void si_end_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs) {
+ struct radeon_cmdbuf *cs = sctx->sqtt->stop_cs[AMD_IP_GFX];
+ sctx->ws->cs_flush(cs, 0, &sctx->last_sqtt_fence);
}
-static bool
-si_get_thread_trace(struct si_context *sctx,
- struct ac_thread_trace *thread_trace)
-{
- unsigned max_se = sctx->screen->info.max_se;
+static bool si_get_sqtt_trace(struct si_context *sctx,
+ struct ac_sqtt_trace *sqtt) {
+ unsigned max_se = sctx->screen->info.max_se;
- memset(thread_trace, 0, sizeof(*thread_trace));
+ memset(sqtt, 0, sizeof(*sqtt));
- sctx->thread_trace->ptr = sctx->ws->buffer_map(sctx->ws, sctx->thread_trace->bo,
- NULL,
- PIPE_MAP_READ);
+ sctx->sqtt->ptr =
+ sctx->ws->buffer_map(sctx->ws, sctx->sqtt->bo, NULL, PIPE_MAP_READ);
- if (!sctx->thread_trace->ptr)
- return false;
+ if (!sctx->sqtt->ptr)
+ return false;
- if (!ac_sqtt_get_trace(sctx->thread_trace, &sctx->screen->info,
- thread_trace)) {
- void *thread_trace_ptr = sctx->thread_trace->ptr;
+ if (!ac_sqtt_get_trace(sctx->sqtt, &sctx->screen->info, sqtt)) {
+ void *sqtt_ptr = sctx->sqtt->ptr;
- for (unsigned se = 0; se < max_se; se++) {
- uint64_t info_offset = ac_thread_trace_get_info_offset(se);
- void *info_ptr = thread_trace_ptr + info_offset;
- struct ac_thread_trace_info *info =
- (struct ac_thread_trace_info *)info_ptr;
+ for (unsigned se = 0; se < max_se; se++) {
+ uint64_t info_offset = ac_sqtt_get_info_offset(se);
+ void *info_ptr = sqtt_ptr + info_offset;
+ struct ac_sqtt_data_info *info = (struct ac_sqtt_data_info *)info_ptr;
- if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
- continue;
-
- if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) {
- uint32_t expected_size =
- ac_get_expected_buffer_size(&sctx->screen->info, info);
- uint32_t available_size = (info->cur_offset * 32) / 1024;
-
- fprintf(stderr, "Failed to get the thread trace "
- "because the buffer is too small. The "
- "hardware needs %d KB but the "
- "buffer size is %d KB.\n",
- expected_size, available_size);
- fprintf(stderr, "Please update the buffer size with "
- "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
- return false;
- }
+ if (ac_sqtt_se_is_disabled(&sctx->screen->info, se))
+ continue;
+
+ if (!ac_is_sqtt_complete(&sctx->screen->info, sctx->sqtt, info)) {
+ uint32_t expected_size =
+ ac_get_expected_buffer_size(&sctx->screen->info, info);
+ uint32_t available_size = (info->cur_offset * 32) / 1024;
+
+ fprintf(stderr,
+ "Failed to get the thread trace "
+ "because the buffer is too small. The "
+ "hardware needs %d KB but the "
+ "buffer size is %d KB.\n",
+ expected_size, available_size);
+ fprintf(stderr, "Please update the buffer size with "
+ "AMD_THREAD_TRACE_BUFFER_SIZE=<size_in_kbytes>\n");
+ return false;
}
- }
+ }
+ }
- return true;
+ return true;
}
-
-bool
-si_init_thread_trace(struct si_context *sctx)
-{
- static bool warn_once = true;
- if (warn_once) {
- fprintf(stderr, "*************************************************\n");
- fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
- fprintf(stderr, "*************************************************\n");
- warn_once = false;
- }
-
- sctx->thread_trace = CALLOC_STRUCT(ac_thread_trace_data);
-
- if (sctx->gfx_level < GFX8) {
- fprintf(stderr, "GPU hardware not supported: refer to "
- "the RGP documentation for the list of "
- "supported GPUs!\n");
- return false;
- }
-
- if (sctx->gfx_level > GFX11) {
- fprintf(stderr, "radeonsi: Thread trace is not supported "
- "for that GPU!\n");
- return false;
- }
-
- /* Default buffer size set to 32MB per SE. */
- sctx->thread_trace->buffer_size = debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 32 * 1024) * 1024;
- sctx->thread_trace->start_frame = 10;
-
- const char *trigger = getenv("AMD_THREAD_TRACE_TRIGGER");
- if (trigger) {
- sctx->thread_trace->start_frame = atoi(trigger);
- if (sctx->thread_trace->start_frame <= 0) {
- /* This isn't a frame number, must be a file */
- sctx->thread_trace->trigger_file = strdup(trigger);
- sctx->thread_trace->start_frame = -1;
- }
- }
-
- if (!si_thread_trace_init_bo(sctx))
- return false;
-
- ac_thread_trace_init(sctx->thread_trace);
-
- if (sctx->gfx_level >= GFX10 &&
- debug_get_bool_option("AMD_THREAD_TRACE_SPM", sctx->gfx_level < GFX11)) {
- /* Limit SPM counters to GFX10 and GFX10_3 for now */
- ASSERTED bool r = si_spm_init(sctx);
- assert(r);
- }
-
- si_thread_trace_init_cs(sctx);
-
- sctx->sqtt_next_event = EventInvalid;
-
- return true;
+bool si_init_sqtt(struct si_context *sctx) {
+ static bool warn_once = true;
+ if (warn_once) {
+ fprintf(stderr, "*************************************************\n");
+ fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
+ fprintf(stderr, "*************************************************\n");
+ warn_once = false;
+ }
+
+ sctx->sqtt = CALLOC_STRUCT(ac_sqtt);
+
+ if (sctx->gfx_level < GFX8) {
+ fprintf(stderr, "GPU hardware not supported: refer to "
+ "the RGP documentation for the list of "
+ "supported GPUs!\n");
+ return false;
+ }
+
+ if (sctx->gfx_level > GFX11) {
+ fprintf(stderr, "radeonsi: Thread trace is not supported "
+ "for that GPU!\n");
+ return false;
+ }
+
+ /* Default buffer size set to 32MB per SE. */
+ sctx->sqtt->buffer_size =
+ debug_get_num_option("AMD_THREAD_TRACE_BUFFER_SIZE", 32 * 1024) * 1024;
+ sctx->sqtt->start_frame = 10;
+
+ const char *trigger = getenv("AMD_THREAD_TRACE_TRIGGER");
+ if (trigger) {
+ sctx->sqtt->start_frame = atoi(trigger);
+ if (sctx->sqtt->start_frame <= 0) {
+ /* This isn't a frame number, must be a file */
+ sctx->sqtt->trigger_file = strdup(trigger);
+ sctx->sqtt->start_frame = -1;
+ }
+ }
+
+ if (!si_sqtt_init_bo(sctx))
+ return false;
+
+ ac_sqtt_init(sctx->sqtt);
+
+ if (sctx->gfx_level >= GFX10 &&
+ debug_get_bool_option("AMD_THREAD_TRACE_SPM", sctx->gfx_level < GFX11)) {
+ /* Limit SPM counters to GFX10 and GFX10_3 for now */
+ ASSERTED bool r = si_spm_init(sctx);
+ assert(r);
+ }
+
+ si_sqtt_init_cs(sctx);
+
+ sctx->sqtt_next_event = EventInvalid;
+
+ return true;
}
-void
-si_destroy_thread_trace(struct si_context *sctx)
-{
- struct si_screen *sscreen = sctx->screen;
- struct pb_buffer *bo = sctx->thread_trace->bo;
- radeon_bo_reference(sctx->screen->ws, &bo, NULL);
-
- if (sctx->thread_trace->trigger_file)
- free(sctx->thread_trace->trigger_file);
-
- sscreen->ws->cs_destroy(sctx->thread_trace->start_cs[AMD_IP_GFX]);
- sscreen->ws->cs_destroy(sctx->thread_trace->stop_cs[AMD_IP_GFX]);
-
- struct rgp_pso_correlation *pso_correlation = &sctx->thread_trace->rgp_pso_correlation;
- struct rgp_loader_events *loader_events = &sctx->thread_trace->rgp_loader_events;
- struct rgp_code_object *code_object = &sctx->thread_trace->rgp_code_object;
- list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
- &pso_correlation->record, list) {
- list_del(&record->list);
- free(record);
- }
-
- list_for_each_entry_safe(struct rgp_loader_events_record, record,
- &loader_events->record, list) {
- list_del(&record->list);
- free(record);
- }
-
- list_for_each_entry_safe(struct rgp_code_object_record, record,
- &code_object->record, list) {
- uint32_t mask = record->shader_stages_mask;
- int i;
-
- /* Free the disassembly. */
- while (mask) {
- i = u_bit_scan(&mask);
- free(record->shader_data[i].code);
- }
- list_del(&record->list);
- free(record);
- }
-
- ac_thread_trace_finish(sctx->thread_trace);
-
- hash_table_foreach(sctx->thread_trace->pipeline_bos->table, entry) {
- struct si_sqtt_fake_pipeline *pipeline = (struct si_sqtt_fake_pipeline *)entry->data;
- si_resource_reference(&pipeline->bo, NULL);
- FREE(pipeline);
- }
-
- free(sctx->thread_trace);
- sctx->thread_trace = NULL;
-
- if (sctx->spm.bo)
- si_spm_finish(sctx);
+void si_destroy_sqtt(struct si_context *sctx) {
+ struct si_screen *sscreen = sctx->screen;
+ struct pb_buffer *bo = sctx->sqtt->bo;
+ radeon_bo_reference(sctx->screen->ws, &bo, NULL);
+
+ if (sctx->sqtt->trigger_file)
+ free(sctx->sqtt->trigger_file);
+
+ sscreen->ws->cs_destroy(sctx->sqtt->start_cs[AMD_IP_GFX]);
+ sscreen->ws->cs_destroy(sctx->sqtt->stop_cs[AMD_IP_GFX]);
+
+ struct rgp_pso_correlation *pso_correlation =
+ &sctx->sqtt->rgp_pso_correlation;
+ struct rgp_loader_events *loader_events = &sctx->sqtt->rgp_loader_events;
+ struct rgp_code_object *code_object = &sctx->sqtt->rgp_code_object;
+ list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
+ &pso_correlation->record, list) {
+ list_del(&record->list);
+ free(record);
+ }
+
+ list_for_each_entry_safe(struct rgp_loader_events_record, record,
+ &loader_events->record, list) {
+ list_del(&record->list);
+ free(record);
+ }
+
+ list_for_each_entry_safe(struct rgp_code_object_record, record,
+ &code_object->record, list) {
+ uint32_t mask = record->shader_stages_mask;
+ int i;
+
+ /* Free the disassembly. */
+ while (mask) {
+ i = u_bit_scan(&mask);
+ free(record->shader_data[i].code);
+ }
+ list_del(&record->list);
+ free(record);
+ }
+
+ ac_sqtt_finish(sctx->sqtt);
+
+ hash_table_foreach(sctx->sqtt->pipeline_bos->table, entry) {
+ struct si_sqtt_fake_pipeline *pipeline =
+ (struct si_sqtt_fake_pipeline *)entry->data;
+ si_resource_reference(&pipeline->bo, NULL);
+ FREE(pipeline);
+ }
+
+ free(sctx->sqtt);
+ sctx->sqtt = NULL;
+
+ if (sctx->spm.bo)
+ si_spm_finish(sctx);
}
static uint64_t num_frames = 0;
-void
-si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
-{
- /* Should we enable SQTT yet? */
- if (!sctx->thread_trace_enabled) {
- bool frame_trigger = num_frames == sctx->thread_trace->start_frame;
- bool file_trigger = false;
- if (sctx->thread_trace->trigger_file &&
- access(sctx->thread_trace->trigger_file, W_OK) == 0) {
- if (unlink(sctx->thread_trace->trigger_file) == 0) {
- file_trigger = true;
- } else {
- /* Do not enable tracing if we cannot remove the file,
- * because by then we'll trace every frame.
- */
- fprintf(stderr, "radeonsi: could not remove thread trace trigger file, ignoring\n");
- }
+void si_handle_sqtt(struct si_context *sctx, struct radeon_cmdbuf *rcs) {
+ /* Should we enable SQTT yet? */
+ if (!sctx->sqtt_enabled) {
+ bool frame_trigger = num_frames == sctx->sqtt->start_frame;
+ bool file_trigger = false;
+ if (sctx->sqtt->trigger_file &&
+ access(sctx->sqtt->trigger_file, W_OK) == 0) {
+ if (unlink(sctx->sqtt->trigger_file) == 0) {
+ file_trigger = true;
+ } else {
+ /* Do not enable tracing if we cannot remove the file,
+ * because by then we'll trace every frame.
+ */
+ fprintf(
+ stderr,
+ "radeonsi: could not remove thread trace trigger file, ignoring\n");
}
+ }
- if (frame_trigger || file_trigger) {
- /* Wait for last submission */
- sctx->ws->fence_wait(sctx->ws, sctx->last_gfx_fence, PIPE_TIMEOUT_INFINITE);
+ if (frame_trigger || file_trigger) {
+ /* Wait for last submission */
+ sctx->ws->fence_wait(sctx->ws, sctx->last_gfx_fence,
+ PIPE_TIMEOUT_INFINITE);
- /* Start SQTT */
- si_begin_thread_trace(sctx, rcs);
+ /* Start SQTT */
+ si_begin_sqtt(sctx, rcs);
- sctx->thread_trace_enabled = true;
- sctx->thread_trace->start_frame = -1;
+ sctx->sqtt_enabled = true;
+ sctx->sqtt->start_frame = -1;
- /* Force shader update to make sure si_sqtt_describe_pipeline_bind is called
- * for the current "pipeline".
- */
- sctx->do_update_shaders = true;
- }
- } else {
- struct ac_thread_trace thread_trace = {0};
-
- /* Stop SQTT */
- si_end_thread_trace(sctx, rcs);
- sctx->thread_trace_enabled = false;
- sctx->thread_trace->start_frame = -1;
- assert (sctx->last_sqtt_fence);
-
- /* Wait for SQTT to finish and read back the bo */
- if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence, PIPE_TIMEOUT_INFINITE) &&
- si_get_thread_trace(sctx, &thread_trace)) {
- struct ac_spm_trace spm_trace;
-
- /* Map the SPM counter buffer */
- if (sctx->spm.bo) {
- sctx->spm.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm.bo,
- NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY);
- ac_spm_get_trace(&sctx->spm, &spm_trace);
- }
-
- ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, sctx->spm.bo ? &spm_trace : NULL);
-
- if (sctx->spm.ptr)
- sctx->ws->buffer_unmap(sctx->ws, sctx->spm.bo);
- } else {
- fprintf(stderr, "Failed to read the trace\n");
+ /* Force shader update to make sure si_sqtt_describe_pipeline_bind is
+ * called for the current "pipeline".
+ */
+ sctx->do_update_shaders = true;
+ }
+ } else {
+ struct ac_sqtt_trace sqtt_trace = {0};
+
+ /* Stop SQTT */
+ si_end_sqtt(sctx, rcs);
+ sctx->sqtt_enabled = false;
+ sctx->sqtt->start_frame = -1;
+ assert(sctx->last_sqtt_fence);
+
+ /* Wait for SQTT to finish and read back the bo */
+ if (sctx->ws->fence_wait(sctx->ws, sctx->last_sqtt_fence,
+ PIPE_TIMEOUT_INFINITE) &&
+ si_get_sqtt_trace(sctx, &sqtt_trace)) {
+ struct ac_spm_trace spm_trace;
+
+ /* Map the SPM counter buffer */
+ if (sctx->spm.bo) {
+ sctx->spm.ptr = sctx->ws->buffer_map(
+ sctx->ws, sctx->spm.bo, NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY);
+ ac_spm_get_trace(&sctx->spm, &spm_trace);
}
- }
- num_frames++;
-}
+ ac_dump_rgp_capture(&sctx->screen->info, &sqtt_trace,
+ sctx->spm.bo ? &spm_trace : NULL);
+ if (sctx->spm.ptr)
+ sctx->ws->buffer_unmap(sctx->ws, sctx->spm.bo);
+ } else {
+ fprintf(stderr, "Failed to read the trace\n");
+ }
+ }
-static void
-si_emit_thread_trace_userdata(struct si_context* sctx,
- struct radeon_cmdbuf *cs,
- const void *data, uint32_t num_dwords)
-{
- const uint32_t *dwords = (uint32_t *)data;
+ num_frames++;
+}
- radeon_begin(cs);
+static void si_emit_sqtt_userdata(struct si_context *sctx,
+ struct radeon_cmdbuf *cs, const void *data,
+ uint32_t num_dwords) {
+ const uint32_t *dwords = (uint32_t *)data;
- while (num_dwords > 0) {
- uint32_t count = MIN2(num_dwords, 2);
+ radeon_begin(cs);
- /* Without the perfctr bit the CP might not always pass the
- * write on correctly. */
- radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count, sctx->gfx_level >= GFX10);
+ while (num_dwords > 0) {
+ uint32_t count = MIN2(num_dwords, 2);
- radeon_emit_array(dwords, count);
+ /* Without the perfctr bit the CP might not always pass the
+ * write on correctly. */
+ radeon_set_uconfig_reg_seq(R_030D08_SQ_THREAD_TRACE_USERDATA_2, count,
+ sctx->gfx_level >= GFX10);
- dwords += count;
- num_dwords -= count;
- }
- radeon_end();
+ radeon_emit_array(dwords, count);
+
+ dwords += count;
+ num_dwords -= count;
+ }
+ radeon_end();
}
static void
marker.instance_offset_reg_idx = instance_offset_user_data;
marker.draw_index_reg_idx = draw_index_user_data;
- si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
+ si_emit_sqtt_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
sctx->sqtt_next_event = EventInvalid;
}
marker.thread_y = y;
marker.thread_z = z;
- si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
+ si_emit_sqtt_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
sctx->sqtt_next_event = EventInvalid;
}
marker.cb_id = 0;
marker.dword02 = 0xC0000000 + 10; /* RGP_BARRIER_INTERNAL_BASE */
- si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
+ si_emit_sqtt_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
}
void
marker.flush_db = true;
}
- si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
+ si_emit_sqtt_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
}
void
marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
marker.data_type = type;
- si_emit_thread_trace_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
+ si_emit_sqtt_userdata(sctx, rcs, &marker, sizeof(marker) / 4);
} else {
assert (str != NULL);
struct rgp_sqtt_marker_user_event_with_length marker = { 0 };
memcpy(buffer + sizeof(marker), str, len);
buffer[sizeof(marker) + len - 1] = '\0';
- si_emit_thread_trace_userdata(sctx, rcs, buffer, sizeof(marker) / 4 + marker.length / 4);
+ si_emit_sqtt_userdata(sctx, rcs, buffer,
+ sizeof(marker) / 4 + marker.length / 4);
}
}
-
-bool
-si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,
- uint64_t pipeline_hash)
-{
- simple_mtx_lock(&thread_trace_data->rgp_pso_correlation.lock);
+bool si_sqtt_pipeline_is_registered(struct ac_sqtt *sqtt,
+ uint64_t pipeline_hash) {
+ simple_mtx_lock(&sqtt->rgp_pso_correlation.lock);
list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
- &thread_trace_data->rgp_pso_correlation.record, list) {
+ &sqtt->rgp_pso_correlation.record, list) {
if (record->pipeline_hash[0] == pipeline_hash) {
- simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock);
+ simple_mtx_unlock(&sqtt->rgp_pso_correlation.lock);
return true;
}
-
}
- simple_mtx_unlock(&thread_trace_data->rgp_pso_correlation.lock);
+ simple_mtx_unlock(&sqtt->rgp_pso_correlation.lock);
return false;
}
-
-
static enum rgp_hardware_stages
si_sqtt_pipe_to_rgp_shader_stage(union si_shader_key* key, enum pipe_shader_type stage)
{
struct si_sqtt_fake_pipeline *pipeline,
bool is_compute)
{
- struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
- struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
+ struct rgp_code_object *code_object = &sctx->sqtt->rgp_code_object;
struct rgp_code_object_record *record;
record = malloc(sizeof(struct rgp_code_object_record));
bool
si_sqtt_register_pipeline(struct si_context* sctx, struct si_sqtt_fake_pipeline *pipeline, bool is_compute)
{
- struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
-
- assert (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline->code_hash));
+ assert(!si_sqtt_pipeline_is_registered(sctx->sqtt, pipeline->code_hash));
- bool result = ac_sqtt_add_pso_correlation(thread_trace_data, pipeline->code_hash);
+ bool result = ac_sqtt_add_pso_correlation(sctx->sqtt, pipeline->code_hash);
if (!result)
return false;
- result = ac_sqtt_add_code_object_loader_event(thread_trace_data, pipeline->code_hash, pipeline->bo->gpu_address);
+ result = ac_sqtt_add_code_object_loader_event(
+ sctx->sqtt, pipeline->code_hash, pipeline->bo->gpu_address);
if (!result)
return false;
struct rgp_sqtt_marker_pipeline_bind marker = {0};
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
- if (likely(!sctx->thread_trace_enabled)) {
+ if (likely(!sctx->sqtt_enabled)) {
return;
}
marker.api_pso_hash[0] = pipeline_hash;
marker.api_pso_hash[1] = pipeline_hash >> 32;
- si_emit_thread_trace_userdata(sctx, cs, &marker, sizeof(marker) / 4);
+ si_emit_sqtt_userdata(sctx, cs, &marker, sizeof(marker) / 4);
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs);
}
- if (GFX_VERSION >= GFX9 && unlikely(sctx->thread_trace)) {
+ if (GFX_VERSION >= GFX9 && unlikely(sctx->sqtt)) {
/* Pretend the bound shaders form a vk pipeline. Include the scratch size in
* the hash calculation to force re-emitting the pipeline if the scratch bo
* changes.
}
struct si_sqtt_fake_pipeline *pipeline = NULL;
- struct ac_thread_trace_data *thread_trace_data = sctx->thread_trace;
- if (!si_sqtt_pipeline_is_registered(thread_trace_data, pipeline_code_hash)) {
+ if (!si_sqtt_pipeline_is_registered(sctx->sqtt, pipeline_code_hash)) {
/* This is a new pipeline. Allocate a new bo to hold all the shaders. Without
* this, shader code export process creates huge rgp files because RGP assumes
* the shaders live sequentially in memory (shader N address = shader 0 + offset N)
}
sctx->screen->ws->buffer_unmap(sctx->screen->ws, bo->buf);
- _mesa_hash_table_u64_insert(sctx->thread_trace->pipeline_bos,
+ _mesa_hash_table_u64_insert(sctx->sqtt->pipeline_bos,
pipeline_code_hash, pipeline);
si_sqtt_register_pipeline(sctx, pipeline, false);
si_resource_reference(&bo, NULL);
}
} else {
- pipeline = (struct si_sqtt_fake_pipeline *)
- _mesa_hash_table_u64_search(sctx->thread_trace->pipeline_bos, pipeline_code_hash);
+ pipeline = (struct si_sqtt_fake_pipeline *)_mesa_hash_table_u64_search(
+ sctx->sqtt->pipeline_bos, pipeline_code_hash);
}
assert(pipeline);
radeon_end();
}
-#define EMIT_SQTT_END_DRAW do { \
- if (GFX_VERSION >= GFX9 && unlikely(sctx->thread_trace_enabled)) { \
- radeon_begin(&sctx->gfx_cs); \
- radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \
- radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | \
- EVENT_INDEX(0)); \
- radeon_end(); \
- } \
- } while (0)
+#define EMIT_SQTT_END_DRAW \
+ do { \
+ if (GFX_VERSION >= GFX9 && unlikely(sctx->sqtt_enabled)) { \
+ radeon_begin(&sctx->gfx_cs); \
+ radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \
+ radeon_emit(EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); \
+ radeon_end(); \
+ } \
+ } while (0)
template <amd_gfx_level GFX_VERSION, si_has_ngg NGG, si_is_draw_vertex_state IS_DRAW_VERTEX_STATE>
ALWAYS_INLINE
{
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
- if (unlikely(sctx->thread_trace_enabled)) {
+ if (unlikely(sctx->sqtt_enabled)) {
si_sqtt_write_event_marker(sctx, &sctx->gfx_cs, sctx->sqtt_next_event,
UINT_MAX, UINT_MAX, UINT_MAX);
}