#if defined(__i386__)
#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
#endif
#if defined(__x86_64__)
#define rmb() asm volatile("lfence" ::: "memory")
+#define wmb() asm volatile("sfence" ::: "memory")
#endif
#define N_PAGES 32
return 1;
}
+static int ctx_switch(struct gpu_perf *gp, const void *event)
+{
+ const struct sample_event *sample = event;
+
+ gp->ctx_switch[sample->raw[1]]++;
+ return 1;
+}
+
static int ring_sync(struct gpu_perf *gp, const void *event)
{
const struct sample_event *sample = event;
perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_end", wait_end);
perf_tracepoint_open(gp, "i915", "i915_flip_complete", flip_complete);
perf_tracepoint_open(gp, "i915", "i915_gem_ring_sync_to", ring_sync);
+ perf_tracepoint_open(gp, "i915", "i915_gem_ring_switch_context", ctx_switch);
if (gp->nr_events == 0) {
gp->error = "i915.ko tracepoints not available";
return;
}
-static int process_sample(struct gpu_perf *gp,
+static int process_sample(struct gpu_perf *gp, int cpu,
const struct perf_event_header *header)
{
const struct sample_event *sample = (const struct sample_event *)header;
int n, update = 0;
/* hash me! */
- for (n = 0; n < gp->nr_cpus * gp->nr_events; n++) {
- if (gp->sample[n].id != sample->id)
+ for (n = 0; n < gp->nr_events; n++) {
+ int m = n * gp->nr_cpus + cpu;
+ if (gp->sample[m].id != sample->id)
continue;
- update = 1;
- if (gp->sample[n].func)
- update = gp->sample[n].func(gp, sample);
+ update = gp->sample[m].func(gp, sample);
break;
}
}
if (header->type == PERF_RECORD_SAMPLE)
- update += process_sample(gp, header);
+ update += process_sample(gp, n, header);
tail += header->size;
}
if (wrap)
tail &= mask;
mmap->data_tail = tail;
+ wmb();
}
free(buffer);
struct overlay_gpu_perf {
struct gpu_perf gpu_perf;
+ time_t show_ctx;
};
struct overlay_gpu_freq {
cairo_t *cr;
int width, height;
+ time_t time;
+
struct overlay_gpu_top gpu_top;
struct overlay_gpu_perf gpu_perf;
struct overlay_gpu_freq gpu_freq;
struct overlay_gpu_perf *gp)
{
gpu_perf_init(&gp->gpu_perf, 0);
+
+ gp->show_ctx = 0;
}
static char *get_comm(pid_t pid, char *comm, int len)
char buf[1024];
cairo_pattern_t *linear;
int x, y, y1, y2, n;
+ int has_ctx = 0;
+
+ gpu_perf_update(&gp->gpu_perf);
+
+ for (n = 4; n > 0; n--) {
+ if (gp->gpu_perf.ctx_switch[n-1]) {
+ has_ctx = n;
+ break;
+ }
+ }
cairo_rectangle(ctx->cr, ctx->width/2+HALF_PAD-.5, PAD-.5, ctx->width/2-SIZE_PAD+1, ctx->height/2-SIZE_PAD+1);
cairo_set_source_rgb(ctx->cr, .15, .15, .15);
return;
}
- gpu_perf_update(&gp->gpu_perf);
-
y = PAD + 12 - 2;
x = ctx->width/2 + HALF_PAD;
-
for (comm = gp->gpu_perf.comm; comm; comm = comm->next) {
int total;
chart_draw(comm->user_data, ctx->cr);
y2 += 14;
}
+ if (has_ctx || gp->show_ctx)
+ y2 += 14;
y1 += -12 - 2;
y2 += 14 - 14 + 4;
cairo_move_to(ctx->cr, x, y);
cairo_show_text(ctx->cr, buf);
y += 14;
+
+ cairo_set_source_rgba(ctx->cr, 1, 1, 1, 1);
+ cairo_move_to(ctx->cr, x, y);
+ if (has_ctx) {
+ int len = sprintf(buf, "Contexts:");
+ for (n = 0; n < has_ctx; n++)
+ len += sprintf(buf + len, "%s %d",
+ n ? "," : "",
+ gp->gpu_perf.ctx_switch[n]);
+
+ memset(gp->gpu_perf.ctx_switch, 0, sizeof(gp->gpu_perf.ctx_switch));
+ gp->show_ctx = ctx->time;
+
+ cairo_show_text(ctx->cr, buf);
+ y += 14;
+ } else if (gp->show_ctx) {
+ cairo_show_text(ctx->cr, "Contexts: 0");
+ y += 14;
+ if (ctx->time - gp->show_ctx > 10)
+ gp->show_ctx = 0;
+ }
}
static void init_gpu_freq(struct overlay_context *ctx,
i = 0;
while (1) {
+ ctx.time = time(NULL);
+
ctx.cr = cairo_create(ctx.surface);
cairo_set_operator(ctx.cr, CAIRO_OPERATOR_CLEAR);
cairo_paint(ctx.cr);