From 67f533f836487093a27e176d64de206772088345 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 17 Aug 2013 22:33:35 +0100
Subject: [PATCH] overlay: Accumulate busy times

Still a little too course as we add multiple overlapping seqnos and
waits.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 overlay/gpu-perf.c | 75 +++++++++++++++++++++++++++++++++++++++---------------
 overlay/gpu-perf.h | 13 +++++-----
 overlay/overlay.c  | 21 +++++++++++++++
 3 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c
index 653148b..56bc8b0 100644
--- a/overlay/gpu-perf.c
+++ b/overlay/gpu-perf.c
@@ -154,7 +154,7 @@ err:
 	return EINVAL;
 }
 
-static char *get_comm(pid_t pid, char *comm, int len)
+static int get_comm(pid_t pid, char *comm, int len)
 {
 	char filename[1024];
 	int fd;
@@ -168,9 +168,10 @@ static char *get_comm(pid_t pid, char *comm, int len)
 		if (len >= 0)
 			comm[len-1] = '\0';
 		close(fd);
-	}
+	} else
+		len = -1;
 
-	return comm;
+	return len;
 }
 
 static struct gpu_perf_comm *
@@ -178,6 +179,9 @@ lookup_comm(struct gpu_perf *gp, pid_t pid)
 {
 	struct gpu_perf_comm *comm;
 
+	if (pid == 0)
+		return NULL;
+
 	for (comm = gp->comm; comm != NULL; comm = comm->next) {
 		if (comm->pid == pid)
 			break;
@@ -187,10 +191,14 @@ lookup_comm(struct gpu_perf *gp, pid_t pid)
 		if (comm == NULL)
 			return NULL;
 
+		if (get_comm(pid, comm->name, sizeof(comm->name)) < 0) {
+			free(comm);
+			return NULL;
+		}
+
+		comm->pid = pid;
 		comm->next = gp->comm;
 		gp->comm = comm;
-		get_comm(pid, comm->name, sizeof(comm->name));
-		comm->pid = pid;
 	}
 
 	return comm;
@@ -209,13 +217,44 @@ static int request_add(struct gpu_perf *gp, const void *event)
 	return 1;
 }
 
-static int seqno_start(struct gpu_perf *gp, const void *event)
+static int busy_start(struct gpu_perf *gp, const void *event)
 {
+	const struct sample_event *sample = event;
+	struct gpu_perf_comm *comm;
+	struct gpu_perf_time *busy;
+
+	comm = lookup_comm(gp, sample->pid);
+	if (comm == NULL)
+		return 0;
+
+	busy = malloc(sizeof(*busy));
+	if (busy == NULL)
+		return 0;
+
+	busy->seqno = sample->raw[2];
+	busy->time = sample->time;
+	busy->comm = comm;
+	busy->next = gp->busy;
+	gp->busy = busy;
+
 	return 0;
 }
 
-static int seqno_end(struct gpu_perf *gp, const void *event)
+static int busy_end(struct gpu_perf *gp, const void *event)
 {
+	const struct sample_event *sample = event;
+	struct gpu_perf_time *busy, **prev;
+
+	for (prev = &gp->busy; (busy = *prev) != NULL; prev = &busy->next) {
+		if (busy->seqno != sample->raw[2])
+			continue;
+
+		busy->comm->busy_time += sample->time - busy->time;
+		*prev = busy->next;
+		free(busy);
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -229,7 +268,7 @@ static int wait_begin(struct gpu_perf *gp, const void *event)
 {
 	const struct sample_event *sample = event;
 	struct gpu_perf_comm *comm;
-	struct gpu_perf_wait *wait;
+	struct gpu_perf_time *wait;
 
 	comm = lookup_comm(gp, sample->pid);
 	if (comm == NULL)
@@ -239,10 +278,11 @@ static int wait_begin(struct gpu_perf *gp, const void *event)
 	if (wait == NULL)
 		return 0;
 
+	wait->comm = comm;
 	wait->seqno = sample->raw[3];
 	wait->time = sample->time;
-	wait->next = comm->wait;
-	comm->wait = wait;
+	wait->next = gp->wait;
+	gp->wait = wait;
 
 	return 0;
 }
@@ -250,18 +290,13 @@ static int wait_begin(struct gpu_perf *gp, const void *event)
 static int wait_end(struct gpu_perf *gp, const void *event)
 {
 	const struct sample_event *sample = event;
-	struct gpu_perf_comm *comm;
-	struct gpu_perf_wait *wait, **prev;
-
-	comm = lookup_comm(gp, sample->pid);
-	if (comm == NULL)
-		return 0;
+	struct gpu_perf_time *wait, **prev;
 
-	for (prev = &comm->wait; (wait = *prev) != NULL; prev = &wait->next) {
+	for (prev = &gp->wait; (wait = *prev) != NULL; prev = &wait->next) {
 		if (wait->seqno != sample->raw[3])
 			continue;
 
-		comm->wait_time += sample->time - wait->time;
+		wait->comm->wait_time += sample->time - wait->time;
 		*prev = wait->next;
 		free(wait);
 		return 1;
@@ -277,8 +312,8 @@ void gpu_perf_init(struct gpu_perf *gp, unsigned flags)
 	gp->page_size = getpagesize();
 
 	perf_tracepoint_open(gp, "i915", "i915_gem_request_add", request_add);
-	if (perf_tracepoint_open(gp, "i915", "i915_gem_ring_complete", seqno_end) == 0)
-		perf_tracepoint_open(gp, "i915", "i915_gem_ring_dispatch", seqno_start);
+	if (perf_tracepoint_open(gp, "i915", "i915_gem_ring_complete", busy_end) == 0)
+		perf_tracepoint_open(gp, "i915", "i915_gem_ring_dispatch", busy_start);
 	if (perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_begin", wait_begin) == 0)
 		perf_tracepoint_open(gp, "i915", "i915_gem_request_wait_end", wait_end);
 	perf_tracepoint_open(gp, "i915", "i915_flip_complete", flip_complete);
diff --git a/overlay/gpu-perf.h b/overlay/gpu-perf.h
index 476bbaa..ced9379 100644
--- a/overlay/gpu-perf.h
+++ b/overlay/gpu-perf.h
@@ -14,19 +14,20 @@ struct gpu_perf {
 	int flip_complete;
 	struct gpu_perf_comm {
 		struct gpu_perf_comm *next;
-		struct gpu_perf_wait {
-			struct gpu_perf_wait *next;
-			uint32_t seqno;
-			uint64_t time;
-		} *wait;
 		char name[256];
 		pid_t pid;
 		int nr_requests[4];
 		void *user_data;
 
-		uint64_t wait_begin;
 		uint64_t wait_time;
+		uint64_t busy_time;
 	} *comm;
+	struct gpu_perf_time {
+		struct gpu_perf_time *next;
+		struct gpu_perf_comm *comm;
+		uint32_t seqno;
+		uint64_t time;
+	} *wait, *busy;
 };
 
 void gpu_perf_init(struct gpu_perf *gp, unsigned flags);
diff --git a/overlay/overlay.c b/overlay/overlay.c
index bd327ab..4232711 100644
--- a/overlay/overlay.c
+++ b/overlay/overlay.c
@@ -286,6 +286,27 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf *
 			}
 			comm->wait_time = 0;
 		}
+		if (comm->busy_time) {
+			buf[0] = '\0';
+			if (comm->busy_time > 1000*1000) {
+				sprintf(buf, "%s %.1f ms busy",
+					need_comma ? "," : "",
+					comm->busy_time / (1000*1000.));
+			} else if (comm->busy_time > 100) {
+				sprintf(buf, "%s %.1f us busy",
+					need_comma ? "," : "",
+					comm->busy_time / 1000.);
+			} else {
+				sprintf(buf, "%s %.0f ns busy",
+					need_comma ? "," : "",
+					(double)comm->busy_time);
+			}
+			if (buf[0] != '\0') {
+				cairo_show_text(ctx->cr, buf);
+				need_comma = true;
+			}
+			comm->busy_time = 0;
+		}
 		y += 14;
 
 		memset(comm->nr_requests, 0, sizeof(comm->nr_requests));
-- 
2.7.4