#include "util/u_async_debug.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
+#include "si_tracepoints.h"
#define COMPUTE_DBG(sscreen, fmt, args...) \
do { \
NULL);
}
}
-
+
+ if (u_trace_perfetto_active(&sctx->ds.trace_context))
+ trace_si_begin_compute(&sctx->trace);
+
if (sctx->bo_list_add_all_compute_resources)
si_compute_resources_add_all_to_bo_list(sctx);
sctx->compute_is_busy = true;
sctx->num_compute_calls++;
+ if (u_trace_perfetto_active(&sctx->ds.trace_context))
+ trace_si_end_compute(&sctx->trace, info->grid[0], info->grid[1], info->grid[2]);
+
if (cs_regalloc_hang) {
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
si_mark_atom_dirty(sctx, &sctx->atoms.s.cache_flush);
if (unlikely(sctx->sqtt && (flags & PIPE_FLUSH_END_OF_FRAME))) {
si_handle_sqtt(sctx, &sctx->gfx_cs);
}
+
+ if (u_trace_perfetto_active(&sctx->ds.trace_context)) {
+ u_trace_context_process(&sctx->ds.trace_context, flags & PIPE_FLUSH_END_OF_FRAME);
+ }
} else {
/* Instead of flushing, create a deferred fence. Constraints:
* - the gallium frontend must allow a deferred flush.
#include "util/u_log.h"
#include "util/u_upload_mgr.h"
#include "ac_debug.h"
+#include "si_utrace.h"
void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence)
{
if (ctx->is_noop)
flags |= RADEON_FLUSH_NOOP;
+ uint64_t start_ts = 0, submission_id = 0;
+ if (u_trace_perfetto_active(&ctx->ds.trace_context)) {
+ start_ts = si_ds_begin_submit(&ctx->ds_queue);
+ submission_id = ctx->ds_queue.submission_id;
+ }
+
/* Flush the CS. */
ws->cs_flush(cs, flags, &ctx->last_gfx_fence);
+ if (u_trace_perfetto_active(&ctx->ds.trace_context) && start_ts > 0) {
+ si_ds_end_submit(&ctx->ds_queue, start_ts);
+ }
+
tc_driver_internal_flush_notify(ctx->tc);
if (fence)
ws->fence_reference(fence, ctx->last_gfx_fence);
if (ctx->current_saved_cs)
si_saved_cs_reference(&ctx->current_saved_cs, NULL);
+ if (u_trace_perfetto_active(&ctx->ds.trace_context))
+ si_utrace_flush(ctx, submission_id);
+
si_begin_new_gfx_cs(ctx, false);
ctx->gfx_flush_in_progress = false;
}
{
bool is_secure = false;
+ if (!first_cs)
+ u_trace_fini(&ctx->trace);
+
if (unlikely(radeon_uses_secure_bos(ctx->ws))) {
is_secure = ctx->ws->cs_is_secure(&ctx->gfx_cs);
assert(!ctx->gfx_cs.prev_dw);
ctx->initial_gfx_cs_size = ctx->gfx_cs.current.cdw;
+ u_trace_init(&ctx->trace, &ctx->ds.trace_context);
/* All buffer references are removed on a flush, so si_check_needs_implicit_sync
* cannot determine if si_make_CB_shader_coherent() needs to be called.
* ctx->force_cb_shader_coherent will be cleared by the first call to
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
uint64_t va = buffer->gpu_address + offset;
si_cp_release_mem(sctx, cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
- EOP_DATA_SEL_TIMESTAMP, NULL, va, 0, PIPE_QUERY_TIMESTAMP);
+ EOP_DATA_SEL_TIMESTAMP, buffer, va, 0, PIPE_QUERY_TIMESTAMP);
}
void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned cp_coher_cntl)
using IncrementalStateType = SIRenderpassIncrementalState;
};
-class SIRenderpassDataSource : public MesaRenderpassDataSource<SIRenderpassDataSource, SIRenderpassTraits> {
+class SIRenderpassDataSource : public MesaRenderpassDataSource<SIRenderpassDataSource,
+ SIRenderpassTraits> {
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(SIRenderpassDataSource);
device->sync_gpu_ts = gpu_ts;
device->next_clock_sync_ns = cpu_ts + 1000000000ull;
- MesaRenderpassDataSource<SIRenderpassDataSource, SIRenderpassTraits>::EmitClockSync(ctx, cpu_ts, gpu_ts, device->gpu_clock_id);
+ MesaRenderpassDataSource<SIRenderpassDataSource, SIRenderpassTraits>::
+ EmitClockSync(ctx, cpu_ts, gpu_ts, device->gpu_clock_id);
}
-static void send_descriptors(SIRenderpassDataSource::TraceContext &ctx, struct si_ds_device *device)
+static void send_descriptors(SIRenderpassDataSource::TraceContext &ctx,
+ struct si_ds_device *device)
{
PERFETTO_LOG("Sending renderstage descriptors");
* by si_ds_queue_stage.
*/
char name[100];
- snprintf(name, sizeof(name), "%.10s-%s-%u-%s", util_get_process_name(), queue->name, s, si_queue_stage_desc[s].name);
+ snprintf(name, sizeof(name), "%.10s-%s-%u-%s", util_get_process_name(),
+ queue->name, s, si_queue_stage_desc[s].name);
auto desc = interned_data->add_gpu_specifications();
desc->set_iid(queue->stages[s].queue_iid);
sync_timestamp(ctx, device);
}
-typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *, const void*);
+typedef void (*trace_payload_as_extra_func)(perfetto::protos::pbzero::GpuRenderStageEvent *,
+ const void*);
static void begin_event(struct si_ds_queue *queue, uint64_t ts_ns, enum si_ds_queue_stage stage_id)
{
queue->stages[stage_id].level++;
}
-static void end_event(struct si_ds_queue *queue, uint64_t ts_ns, enum si_ds_queue_stage stage_id, uint32_t submission_id, const char *app_event, const void* payload = nullptr, trace_payload_as_extra_func payload_as_extra = nullptr)
+static void end_event(struct si_ds_queue *queue, uint64_t ts_ns, enum si_ds_queue_stage stage_id,
+ uint32_t submission_id, const char *app_event, const void* payload = nullptr,
+ trace_payload_as_extra_func payload_as_extra = nullptr)
{
PERFETTO_LOG("end event called - ts_ns=%lu", ts_ns);
struct si_ds_device *device = queue->device;
* stage_iid if not already seen. Otherwise, it's a driver event and we
* have use the internal stage_iid.
*/
- uint64_t stage_iid = app_event ? tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event) : stage->stage_iid;
+ uint64_t stage_iid = app_event ?
+ tctx.GetDataSourceLocked()->debug_marker_stage(tctx, app_event) :
+ stage->stage_iid;
auto packet = tctx.NewTracePacket();
si_gpu_tracepoint_config_variable();
}
-void si_ds_device_init(struct si_ds_device *device, const struct radeon_info *devinfo, uint32_t gpu_id, enum amd_ds_api api)
+void si_ds_device_init(struct si_ds_device *device, const struct radeon_info *devinfo,
+ uint32_t gpu_id, enum amd_ds_api api)
{
device->gpu_id = gpu_id;
device->gpu_clock_id = si_pps_clock_id(gpu_id);
u_trace_context_fini(&device->trace_context);
}
-struct si_ds_queue * si_ds_device_init_queue(struct si_ds_device *device, struct si_ds_queue *queue, const char *fmt_name, ...)
+struct si_ds_queue * si_ds_device_init_queue(struct si_ds_device *device,
+ struct si_ds_queue *queue,
+ const char *fmt_name, ...)
{
va_list ap;
queue->device = device;
return queue;
}
-void si_ds_flush_data_init(struct si_ds_flush_data *data, struct si_ds_queue *queue, uint64_t submission_id)
+void si_ds_flush_data_init(struct si_ds_flush_data *data, struct si_ds_queue *queue,
+ uint64_t submission_id)
{
memset(data, 0, sizeof(*data));
#include "util/u_upload_mgr.h"
#include "util/xmlconfig.h"
#include "vl/vl_decoder.h"
+#include "si_utrace.h"
#include <xf86drm.h>
si_destroy_sqtt(sctx);
}
+ si_utrace_fini(sctx);
+
pipe_resource_reference(&sctx->esgs_ring, NULL);
pipe_resource_reference(&sctx->gsvs_ring, NULL);
pipe_resource_reference(&sctx->tess_rings, NULL);
sctx->shader.gs.key.ge.opt.prefer_mono = 1;
}
+ si_utrace_init(sctx);
+
si_begin_new_gfx_cs(sctx, true);
assert(sctx->gfx_cs.current.cdw == sctx->initial_gfx_cs_size);
}
sctx->initial_gfx_cs_size = sctx->gfx_cs.current.cdw;
+ sctx->last_timestamp_cmd = NULL;
sctx->cs_blit_shaders = _mesa_hash_table_create_u32_keys(NULL);
if (!sctx->cs_blit_shaders)
break;
}
+ si_driver_ds_init();
+
drmFreeVersion(version);
return rw ? rw->screen : NULL;
}
#include "util/u_prim.h"
#include "util/u_upload_mgr.h"
#include "ac_rtld.h"
+#include "si_build_pm4.h"
+#include "si_tracepoints.h"
#if (GFX_VER == 6)
#define GFX(name) name##GFX6
si_need_gfx_cs_space(sctx, num_draws);
+ if (u_trace_perfetto_active(&sctx->ds.trace_context))
+ trace_si_begin_draw(&sctx->trace);
+
unsigned instance_count = info->instance_count;
/* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
zstex->depth_cleared_level_mask &= ~BITFIELD_BIT(sctx->framebuffer.state.zsbuf->u.tex.level);
}
+ if (u_trace_perfetto_active(&sctx->ds.trace_context)) {
+ trace_si_end_draw(&sctx->trace, total_direct_count);
+ }
+
DRAW_CLEANUP;
}
#include "util/hash_table.h"
-static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, unsigned idx, bool end_of_pipe)
+static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps,
+ unsigned idx, bool end_of_pipe)
{
struct si_context *ctx = container_of(trace, struct si_context, trace);
struct pipe_resource *buffer = timestamps;
struct si_resource *ts_bo = si_resource(buffer);
- if (ctx->gfx_cs.current.buf == ctx->last_timestamp_cmd && ctx->gfx_cs.current.cdw == ctx->last_timestamp_cmd_cdw) {
+ if (ctx->gfx_cs.current.buf == ctx->last_timestamp_cmd &&
+ ctx->gfx_cs.current.cdw == ctx->last_timestamp_cmd_cdw) {
uint64_t *ts = si_buffer_map(ctx, ts_bo, PIPE_MAP_READ);
ts[idx] = U_TRACE_NO_TIMESTAMP;
return;
ctx->last_timestamp_cmd_cdw = ctx->gfx_cs.current.cdw;
}
-static uint64_t si_utrace_read_ts(struct u_trace_context *utctx, void *timestamps, unsigned idx, void *flush_data)
+static uint64_t si_utrace_read_ts(struct u_trace_context *utctx, void *timestamps,
+ unsigned idx, void *flush_data)
{
struct si_context *ctx = container_of(utctx, struct si_context, ds.trace_context);
struct pipe_resource *buffer = timestamps;
--- /dev/null
+buffers {
+ size_kb: 16384
+ fill_policy: RING_BUFFER
+}
+
+data_sources {
+ config {
+ name: "gpu.renderstages.amd"
+ }
+}
+
+data_sources {
+ config {
+ name: "track_event"
+ track_event_config {
+ enabled_categories: "mesa.default"
+ enabled_categories: "mesa.slow"
+ }
+ }
+}
+
+duration_ms: 2000
+write_into_file: true
+file_write_period_ms: 500
+flush_period_ms: 500
data_sources {
config {
+ name: "gpu.renderstages.amd"
+ }
+}
+
+data_sources {
+ config {
name: "track_event"
track_event_config {
#enabled_tags: "slow"