drm/v3d: New debugfs end-points to query GPU usage stats.

author Jose Maria Casanova Crespo <jmcasanova@igalia.com>

Tue, 7 Feb 2023 12:54:02 +0000 (13:54 +0100)

committer Dom Cobley <popcornmix@gmail.com>

Mon, 19 Feb 2024 11:33:34 +0000 (11:33 +0000)
author Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Tue, 7 Feb 2023 12:54:02 +0000 (13:54 +0100)
committer Dom Cobley <popcornmix@gmail.com>
Mon, 19 Feb 2024 11:33:34 +0000 (11:33 +0000)
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c

index 330669f..3b93c41 100644 (file)
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -6,6 +6,7 @@
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
  #include <linux/string_helpers.h>
+#include <linux/sched/clock.h>
  
  #include <drm/drm_debugfs.h>
  
@@ -202,6 +203,82 @@ static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
         return 0;
  }
  
+static int v3d_debugfs_gpu_usage(struct seq_file *m, void *unused)
+{
+       struct drm_debugfs_entry *entry = m->private;
+       struct drm_device *dev = entry->dev;
+       struct v3d_dev *v3d = to_v3d_dev(dev);
+       struct v3d_queue_stats *queue_stats;
+       enum v3d_queue queue;
+       u64 timestamp = local_clock();
+       u64 active_runtime;
+
+       seq_printf(m, "timestamp;%llu;\n", local_clock());
+       seq_printf(m, "\"QUEUE\";\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+       for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+               if (!v3d->queue[queue].sched.ready)
+                       continue;
+
+               queue_stats = &v3d->gpu_queue_stats[queue];
+               mutex_lock(&queue_stats->lock);
+               v3d_sched_stats_update(queue_stats);
+               if (queue_stats->last_pid)
+                       active_runtime = timestamp - queue_stats->last_exec_start;
+               else
+                       active_runtime = 0;
+
+               seq_printf(m, "%s;%d;%llu;%c;\n",
+                          v3d_queue_to_string(queue),
+                          queue_stats->jobs_sent,
+                          queue_stats->runtime + active_runtime,
+                          queue_stats->last_pid?'1':'0');
+               mutex_unlock(&queue_stats->lock);
+       }
+
+       return 0;
+}
+
+static int v3d_debugfs_gpu_pid_usage(struct seq_file *m, void *unused)
+{
+       struct drm_debugfs_entry *entry = m->private;
+       struct drm_device *dev = entry->dev;
+       struct v3d_dev *v3d = to_v3d_dev(dev);
+       struct v3d_queue_stats *queue_stats;
+       struct v3d_queue_pid_stats *cur;
+       enum v3d_queue queue;
+       u64 active_runtime;
+       u64 timestamp = local_clock();
+
+       seq_printf(m, "timestamp;%llu;\n", timestamp);
+       seq_printf(m, "\"QUEUE\";\"PID\",\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+       for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+
+               if (!v3d->queue[queue].sched.ready)
+                       continue;
+
+               queue_stats = &v3d->gpu_queue_stats[queue];
+               mutex_lock(&queue_stats->lock);
+               queue_stats->gpu_pid_stats_timeout = jiffies + V3D_QUEUE_STATS_TIMEOUT;
+               v3d_sched_stats_update(queue_stats);
+               list_for_each_entry(cur, &queue_stats->pid_stats_list, list) {
+
+                       if (cur->pid == queue_stats->last_pid)
+                               active_runtime = timestamp - queue_stats->last_exec_start;
+                       else
+                               active_runtime = 0;
+
+                       seq_printf(m, "%s;%d;%d;%llu;%c;\n",
+                                  v3d_queue_to_string(queue),
+                                  cur->pid, cur->jobs_sent,
+                                  cur->runtime + active_runtime,
+                                  cur->pid == queue_stats->last_pid ? '1' : '0');
+               }
+               mutex_unlock(&queue_stats->lock);
+       }
+
+       return 0;
+}
+
  static int v3d_measure_clock(struct seq_file *m, void *unused)
  {
         struct drm_debugfs_entry *entry = m->private;
@@ -241,6 +318,8 @@ static const struct drm_debugfs_info v3d_debugfs_list[] = {
         {"v3d_regs", v3d_v3d_debugfs_regs, 0},
         {"measure_clock", v3d_measure_clock, 0},
         {"bo_stats", v3d_debugfs_bo_stats, 0},
+       {"gpu_usage", v3d_debugfs_gpu_usage, 0},
+       {"gpu_pid_usage", v3d_debugfs_gpu_pid_usage, 0},
  };
  
  void
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h

index 73ba92f..e2f1986 100644 (file)
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -21,6 +21,19 @@ struct reset_control;
  
  #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
  
+static inline char *
+v3d_queue_to_string(enum v3d_queue queue)
+{
+       switch (queue) {
+       case V3D_BIN: return "v3d_bin";
+       case V3D_RENDER: return "v3d_render";
+       case V3D_TFU: return "v3d_tfu";
+       case V3D_CSD: return "v3d_csd";
+       case V3D_CACHE_CLEAN: return "v3d_cache_clean";
+       }
+       return "UNKNOWN";
+}
+
  struct v3d_queue_state {
         struct drm_gpu_scheduler sched;
  
@@ -28,6 +41,44 @@ struct v3d_queue_state {
         u64 emit_seqno;
  };
  
+struct v3d_queue_pid_stats {
+       struct  list_head list;
+       u64     runtime;
+       /* Time in jiffes.to purge the stats of this process. Every time a
+        * process sends a new job to the queue, this timeout is delayed by
+        * V3D_QUEUE_STATS_TIMEOUT while the gpu_pid_stats_timeout of the
+        * queue is not reached.
+        */
+       unsigned long timeout_purge;
+       u32     jobs_sent;
+       pid_t   pid;
+};
+
+struct v3d_queue_stats {
+       struct mutex lock;
+       u64     last_exec_start;
+       u64     last_exec_end;
+       u64     runtime;
+       u32     jobs_sent;
+       /* Time in jiffes to stop collecting gpu stats by process. This is
+        * increased by every access to*the debugfs interface gpu_pid_usage.
+        * If the debugfs is not used stats are not collected.
+        */
+       unsigned long gpu_pid_stats_timeout;
+       pid_t   last_pid;
+       struct list_head pid_stats_list;
+};
+
+/* pid_stats by process (v3d_queue_pid_stats) are recorded if there is an
+ * access to the gpu_pid_usageare debugfs interface for the last
+ * V3D_QUEUE_STATS_TIMEOUT (70s).
+ *
+ * The same timeout is used to purge the stats by process for those process
+ * that have not sent jobs this period.
+ */
+#define V3D_QUEUE_STATS_TIMEOUT (70 * HZ)
+
+
  /* Performance monitor object. The perform lifetime is controlled by userspace
   * using perfmon related ioctls. A perfmon can be attached to a submit_cl
   * request, and when this is the case, HW perf counters will be activated just
@@ -147,6 +198,8 @@ struct v3d_dev {
                 u32 num_allocated;
                 u32 pages_allocated;
         } bo_stats;
+
+       struct v3d_queue_stats gpu_queue_stats[V3D_MAX_QUEUES];
  };
  
  static inline struct v3d_dev *
@@ -244,6 +297,11 @@ struct v3d_job {
          */
         struct v3d_perfmon *perfmon;
  
+       /* PID of the process that submitted the job that could be used to
+        * for collecting stats by process of gpu usage.
+        */
+       pid_t client_pid;
+
         /* Callback for the freeing of the job on refcount going to 0. */
         void (*free)(struct kref *ref);
  };
@@ -408,6 +466,7 @@ void v3d_mmu_remove_ptes(struct v3d_bo *bo);
  /* v3d_sched.c */
  int v3d_sched_init(struct v3d_dev *v3d);
  void v3d_sched_fini(struct v3d_dev *v3d);
+void v3d_sched_stats_update(struct v3d_queue_stats *queue_stats);
  
  /* v3d_perfmon.c */
  void v3d_perfmon_get(struct v3d_perfmon *perfmon);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c

index fdf9639..3d41aae 100644 (file)
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -460,6 +460,7 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
         job = *container;
         job->v3d = v3d;
         job->free = free;
+       job->client_pid = current->pid;
  
         ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
                                  v3d_priv);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c

index 0f7a23f..5a966b6 100644 (file)
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -14,6 +14,7 @@
   */
  
  #include <linux/platform_device.h>
+#include <linux/sched/clock.h>
  
  #include "v3d_drv.h"
  #include "v3d_regs.h"
@@ -100,6 +101,7 @@ v3d_irq(int irq, void *arg)
         if (intsts & V3D_INT_FLDONE) {
                 struct v3d_fence *fence =
                         to_v3d_fence(v3d->bin_job->base.irq_fence);
+               v3d->gpu_queue_stats[V3D_BIN].last_exec_end = local_clock();
  
                 trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
                 dma_fence_signal(&fence->base);
@@ -109,6 +111,7 @@ v3d_irq(int irq, void *arg)
         if (intsts & V3D_INT_FRDONE) {
                 struct v3d_fence *fence =
                         to_v3d_fence(v3d->render_job->base.irq_fence);
+               v3d->gpu_queue_stats[V3D_RENDER].last_exec_end = local_clock();
  
                 trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
                 dma_fence_signal(&fence->base);
@@ -118,6 +121,7 @@ v3d_irq(int irq, void *arg)
         if (intsts & V3D_INT_CSDDONE) {
                 struct v3d_fence *fence =
                         to_v3d_fence(v3d->csd_job->base.irq_fence);
+               v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
  
                 trace_v3d_csd_irq(&v3d->drm, fence->seqno);
                 dma_fence_signal(&fence->base);
@@ -154,6 +158,7 @@ v3d_hub_irq(int irq, void *arg)
         if (intsts & V3D_HUB_INT_TFUC) {
                 struct v3d_fence *fence =
                         to_v3d_fence(v3d->tfu_job->base.irq_fence);
+               v3d->gpu_queue_stats[V3D_TFU].last_exec_end = local_clock();
  
                 trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
                 dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c

index 06238e6..42a563a 100644 (file)
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -19,6 +19,7 @@
   */
  
  #include <linux/kthread.h>
+#include <linux/sched/clock.h>
  
  #include "v3d_drv.h"
  #include "v3d_regs.h"
@@ -72,6 +73,114 @@ v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job)
                 v3d_perfmon_start(v3d, job->perfmon);
  }
  
+/*
+ * Updates the scheduling stats of the gpu queues runtime for completed jobs.
+ *
+ * It should be called before any new job submission to the queue or before
+ * accessing the stats from the debugfs interface.
+ *
+ * It is expected that calls to this function are done with queue_stats->lock
+ * locked.
+ */
+void
+v3d_sched_stats_update(struct v3d_queue_stats *queue_stats)
+{
+       struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+       struct v3d_queue_pid_stats *cur, *tmp;
+       u64 runtime = 0;
+       bool store_pid_stats =
+               time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout);
+
+       /* If debugfs stats gpu_pid_usage has not been polled for a period,
+        * the pid stats collection is stopped and we purge any existing
+        * pid_stats.
+        *
+        * pid_stats are also purged for clients that have reached the
+        * timeout_purge because the process probably does not exist anymore.
+        */
+       list_for_each_entry_safe_reverse(cur, tmp, pid_stats_list, list) {
+               if (!store_pid_stats || time_is_before_jiffies(cur->timeout_purge)) {
+                       list_del(&cur->list);
+                       kfree(cur);
+               } else {
+                       break;
+               }
+       }
+       /* If a job has finished its stats are updated. */
+       if (queue_stats->last_pid && queue_stats->last_exec_end) {
+               runtime = queue_stats->last_exec_end -
+                         queue_stats->last_exec_start;
+               queue_stats->runtime += runtime;
+
+               if (store_pid_stats) {
+                       struct v3d_queue_pid_stats *pid_stats;
+                       /* Last job info is always at the head of the list */
+                       pid_stats = list_first_entry_or_null(pid_stats_list,
+                               struct v3d_queue_pid_stats, list);
+                       if (pid_stats &&
+                           pid_stats->pid == queue_stats->last_pid) {
+                               pid_stats->runtime += runtime;
+                       }
+               }
+               queue_stats->last_pid = 0;
+       }
+}
+
+/*
+ * Updates the queue usage adding the information of a new job that is
+ * about to be sent to the GPU to be executed.
+ */
+int
+v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
+                       struct drm_sched_job *sched_job)
+{
+
+       struct v3d_queue_pid_stats *pid_stats = NULL;
+       struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
+       struct v3d_queue_pid_stats *cur;
+       struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+       int ret = 0;
+
+       mutex_lock(&queue_stats->lock);
+
+       /* Completion of previous job requires an update of its runtime stats */
+       v3d_sched_stats_update(queue_stats);
+
+       queue_stats->last_exec_start = local_clock();
+       queue_stats->last_exec_end = 0;
+       queue_stats->jobs_sent++;
+       queue_stats->last_pid = job->client_pid;
+
+       /* gpu usage stats by process are being collected */
+       if (time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout)) {
+               list_for_each_entry(cur, pid_stats_list, list) {
+                       if (cur->pid == job->client_pid) {
+                               pid_stats = cur;
+                               break;
+                       }
+               }
+               /* pid_stats of this client is moved to the head of the list. */
+               if (pid_stats) {
+                       list_move(&pid_stats->list, pid_stats_list);
+               } else {
+                       pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
+                                           GFP_KERNEL);
+                       if (!pid_stats) {
+                               ret = -ENOMEM;
+                               goto err_mem;
+                       }
+                       pid_stats->pid = job->client_pid;
+                       list_add(&pid_stats->list, pid_stats_list);
+               }
+               pid_stats->jobs_sent++;
+               pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
+       }
+
+err_mem:
+       mutex_unlock(&queue_stats->lock);
+       return ret;
+}
+
  static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
  {
         struct v3d_bin_job *job = to_bin_job(sched_job);
@@ -107,6 +216,7 @@ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
         trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
                             job->start, job->end);
  
+       v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_BIN], sched_job);
         v3d_switch_perfmon(v3d, &job->base);
  
         /* Set the current and end address of the control list.
@@ -158,6 +268,7 @@ static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
         trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
                             job->start, job->end);
  
+       v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_RENDER], sched_job);
         v3d_switch_perfmon(v3d, &job->base);
  
         /* XXX: Set the QCFG */
@@ -190,6 +301,7 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
  
         trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
  
+       v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
         V3D_WRITE(V3D_TFU_IIA, job->args.iia);
         V3D_WRITE(V3D_TFU_IIS, job->args.iis);
         V3D_WRITE(V3D_TFU_ICA, job->args.ica);
@@ -231,6 +343,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job)
  
         trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
  
+       v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
         v3d_switch_perfmon(v3d, &job->base);
  
         for (i = 1; i <= 6; i++)
@@ -247,7 +360,10 @@ v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
         struct v3d_job *job = to_v3d_job(sched_job);
         struct v3d_dev *v3d = job->v3d;
  
+       v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CACHE_CLEAN],
+                               sched_job);
         v3d_clean_caches(v3d);
+       v3d->gpu_queue_stats[V3D_CACHE_CLEAN].last_exec_end = local_clock();
  
         return NULL;
  }
@@ -385,8 +501,18 @@ v3d_sched_init(struct v3d_dev *v3d)
         int hw_jobs_limit = 1;
         int job_hang_limit = 0;
         int hang_limit_ms = 500;
+       enum v3d_queue q;
         int ret;
  
+       for (q = 0; q < V3D_MAX_QUEUES; q++) {
+               INIT_LIST_HEAD(&v3d->gpu_queue_stats[q].pid_stats_list);
+               /* Setting timeout before current jiffies disables collecting
+                * pid_stats on scheduling init.
+                */
+               v3d->gpu_queue_stats[q].gpu_pid_stats_timeout = jiffies - 1;
+               mutex_init(&v3d->gpu_queue_stats[q].lock);
+       }
+
         ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
                              &v3d_bin_sched_ops,
                              hw_jobs_limit, job_hang_limit,
@@ -440,9 +566,20 @@ void
  v3d_sched_fini(struct v3d_dev *v3d)
  {
         enum v3d_queue q;
+       struct v3d_queue_stats *queue_stats;
  
         for (q = 0; q < V3D_MAX_QUEUES; q++) {
-               if (v3d->queue[q].sched.ready)
+               if (v3d->queue[q].sched.ready) {
+                       queue_stats = &v3d->gpu_queue_stats[q];
+                       mutex_lock(&queue_stats->lock);
+                       /* Setting gpu_pid_stats_timeout to jiffies-1 will
+                        * make v3d_sched_stats_update to purge all
+                        * allocated pid_stats.
+                        */
+                       queue_stats->gpu_pid_stats_timeout = jiffies - 1;
+                       v3d_sched_stats_update(queue_stats);
+                       mutex_unlock(&queue_stats->lock);
                         drm_sched_fini(&v3d->queue[q].sched);
+               }
         }
  }
author	Jose Maria Casanova Crespo <jmcasanova@igalia.com>
	Tue, 7 Feb 2023 12:54:02 +0000 (13:54 +0100)
committer	Dom Cobley <popcornmix@gmail.com>
	Mon, 19 Feb 2024 11:33:34 +0000 (11:33 +0000)
drivers/gpu/drm/v3d/v3d_debugfs.c		patch \| blob \| history
drivers/gpu/drm/v3d/v3d_drv.h		patch \| blob \| history
drivers/gpu/drm/v3d/v3d_gem.c		patch \| blob \| history
drivers/gpu/drm/v3d/v3d_irq.c		patch \| blob \| history
drivers/gpu/drm/v3d/v3d_sched.c		patch \| blob \| history