#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/string_helpers.h>
+#include <linux/sched/clock.h>
#include <drm/drm_debugfs.h>
return 0;
}
+static int v3d_debugfs_gpu_usage(struct seq_file *m, void *unused)
+{
+ struct drm_debugfs_entry *entry = m->private;
+ struct drm_device *dev = entry->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_queue_stats *queue_stats;
+ enum v3d_queue queue;
+ u64 timestamp = local_clock();
+ u64 active_runtime;
+
+ seq_printf(m, "timestamp;%llu;\n", local_clock());
+ seq_printf(m, "\"QUEUE\";\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+ for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+ if (!v3d->queue[queue].sched.ready)
+ continue;
+
+ queue_stats = &v3d->gpu_queue_stats[queue];
+ mutex_lock(&queue_stats->lock);
+ v3d_sched_stats_update(queue_stats);
+ if (queue_stats->last_pid)
+ active_runtime = timestamp - queue_stats->last_exec_start;
+ else
+ active_runtime = 0;
+
+ seq_printf(m, "%s;%d;%llu;%c;\n",
+ v3d_queue_to_string(queue),
+ queue_stats->jobs_sent,
+ queue_stats->runtime + active_runtime,
+ queue_stats->last_pid?'1':'0');
+ mutex_unlock(&queue_stats->lock);
+ }
+
+ return 0;
+}
+
+static int v3d_debugfs_gpu_pid_usage(struct seq_file *m, void *unused)
+{
+ struct drm_debugfs_entry *entry = m->private;
+ struct drm_device *dev = entry->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_queue_stats *queue_stats;
+ struct v3d_queue_pid_stats *cur;
+ enum v3d_queue queue;
+ u64 active_runtime;
+ u64 timestamp = local_clock();
+
+ seq_printf(m, "timestamp;%llu;\n", timestamp);
+ seq_printf(m, "\"QUEUE\";\"PID\",\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
+ for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
+
+ if (!v3d->queue[queue].sched.ready)
+ continue;
+
+ queue_stats = &v3d->gpu_queue_stats[queue];
+ mutex_lock(&queue_stats->lock);
+ queue_stats->gpu_pid_stats_timeout = jiffies + V3D_QUEUE_STATS_TIMEOUT;
+ v3d_sched_stats_update(queue_stats);
+ list_for_each_entry(cur, &queue_stats->pid_stats_list, list) {
+
+ if (cur->pid == queue_stats->last_pid)
+ active_runtime = timestamp - queue_stats->last_exec_start;
+ else
+ active_runtime = 0;
+
+ seq_printf(m, "%s;%d;%d;%llu;%c;\n",
+ v3d_queue_to_string(queue),
+ cur->pid, cur->jobs_sent,
+ cur->runtime + active_runtime,
+ cur->pid == queue_stats->last_pid ? '1' : '0');
+ }
+ mutex_unlock(&queue_stats->lock);
+ }
+
+ return 0;
+}
+
static int v3d_measure_clock(struct seq_file *m, void *unused)
{
struct drm_debugfs_entry *entry = m->private;
{"v3d_regs", v3d_v3d_debugfs_regs, 0},
{"measure_clock", v3d_measure_clock, 0},
{"bo_stats", v3d_debugfs_bo_stats, 0},
+ {"gpu_usage", v3d_debugfs_gpu_usage, 0},
+ {"gpu_pid_usage", v3d_debugfs_gpu_pid_usage, 0},
};
void
#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
+static inline char *
+v3d_queue_to_string(enum v3d_queue queue)
+{
+ switch (queue) {
+ case V3D_BIN: return "v3d_bin";
+ case V3D_RENDER: return "v3d_render";
+ case V3D_TFU: return "v3d_tfu";
+ case V3D_CSD: return "v3d_csd";
+ case V3D_CACHE_CLEAN: return "v3d_cache_clean";
+ }
+ return "UNKNOWN";
+}
+
struct v3d_queue_state {
struct drm_gpu_scheduler sched;
u64 emit_seqno;
};
+struct v3d_queue_pid_stats {
+ struct list_head list;
+ u64 runtime;
+ /* Time in jiffes.to purge the stats of this process. Every time a
+ * process sends a new job to the queue, this timeout is delayed by
+ * V3D_QUEUE_STATS_TIMEOUT while the gpu_pid_stats_timeout of the
+ * queue is not reached.
+ */
+ unsigned long timeout_purge;
+ u32 jobs_sent;
+ pid_t pid;
+};
+
+struct v3d_queue_stats {
+ struct mutex lock;
+ u64 last_exec_start;
+ u64 last_exec_end;
+ u64 runtime;
+ u32 jobs_sent;
+ /* Time in jiffes to stop collecting gpu stats by process. This is
+ * increased by every access to*the debugfs interface gpu_pid_usage.
+ * If the debugfs is not used stats are not collected.
+ */
+ unsigned long gpu_pid_stats_timeout;
+ pid_t last_pid;
+ struct list_head pid_stats_list;
+};
+
+/* pid_stats by process (v3d_queue_pid_stats) are recorded if there is an
+ * access to the gpu_pid_usageare debugfs interface for the last
+ * V3D_QUEUE_STATS_TIMEOUT (70s).
+ *
+ * The same timeout is used to purge the stats by process for those process
+ * that have not sent jobs this period.
+ */
+#define V3D_QUEUE_STATS_TIMEOUT (70 * HZ)
+
+
/* Performance monitor object. The perform lifetime is controlled by userspace
* using perfmon related ioctls. A perfmon can be attached to a submit_cl
* request, and when this is the case, HW perf counters will be activated just
u32 num_allocated;
u32 pages_allocated;
} bo_stats;
+
+ struct v3d_queue_stats gpu_queue_stats[V3D_MAX_QUEUES];
};
static inline struct v3d_dev *
*/
struct v3d_perfmon *perfmon;
+ /* PID of the process that submitted the job that could be used to
+ * for collecting stats by process of gpu usage.
+ */
+ pid_t client_pid;
+
/* Callback for the freeing of the job on refcount going to 0. */
void (*free)(struct kref *ref);
};
/* v3d_sched.c */
int v3d_sched_init(struct v3d_dev *v3d);
void v3d_sched_fini(struct v3d_dev *v3d);
+void v3d_sched_stats_update(struct v3d_queue_stats *queue_stats);
/* v3d_perfmon.c */
void v3d_perfmon_get(struct v3d_perfmon *perfmon);
job = *container;
job->v3d = v3d;
job->free = free;
+ job->client_pid = current->pid;
ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
v3d_priv);
*/
#include <linux/platform_device.h>
+#include <linux/sched/clock.h>
#include "v3d_drv.h"
#include "v3d_regs.h"
if (intsts & V3D_INT_FLDONE) {
struct v3d_fence *fence =
to_v3d_fence(v3d->bin_job->base.irq_fence);
+ v3d->gpu_queue_stats[V3D_BIN].last_exec_end = local_clock();
trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
if (intsts & V3D_INT_FRDONE) {
struct v3d_fence *fence =
to_v3d_fence(v3d->render_job->base.irq_fence);
+ v3d->gpu_queue_stats[V3D_RENDER].last_exec_end = local_clock();
trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
if (intsts & V3D_INT_CSDDONE) {
struct v3d_fence *fence =
to_v3d_fence(v3d->csd_job->base.irq_fence);
+ v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
trace_v3d_csd_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
if (intsts & V3D_HUB_INT_TFUC) {
struct v3d_fence *fence =
to_v3d_fence(v3d->tfu_job->base.irq_fence);
+ v3d->gpu_queue_stats[V3D_TFU].last_exec_end = local_clock();
trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
dma_fence_signal(&fence->base);
*/
#include <linux/kthread.h>
+#include <linux/sched/clock.h>
#include "v3d_drv.h"
#include "v3d_regs.h"
v3d_perfmon_start(v3d, job->perfmon);
}
+/*
+ * Updates the scheduling stats of the gpu queues runtime for completed jobs.
+ *
+ * It should be called before any new job submission to the queue or before
+ * accessing the stats from the debugfs interface.
+ *
+ * It is expected that calls to this function are done with queue_stats->lock
+ * locked.
+ */
+void
+v3d_sched_stats_update(struct v3d_queue_stats *queue_stats)
+{
+ struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+ struct v3d_queue_pid_stats *cur, *tmp;
+ u64 runtime = 0;
+ bool store_pid_stats =
+ time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout);
+
+ /* If debugfs stats gpu_pid_usage has not been polled for a period,
+ * the pid stats collection is stopped and we purge any existing
+ * pid_stats.
+ *
+ * pid_stats are also purged for clients that have reached the
+ * timeout_purge because the process probably does not exist anymore.
+ */
+ list_for_each_entry_safe_reverse(cur, tmp, pid_stats_list, list) {
+ if (!store_pid_stats || time_is_before_jiffies(cur->timeout_purge)) {
+ list_del(&cur->list);
+ kfree(cur);
+ } else {
+ break;
+ }
+ }
+ /* If a job has finished its stats are updated. */
+ if (queue_stats->last_pid && queue_stats->last_exec_end) {
+ runtime = queue_stats->last_exec_end -
+ queue_stats->last_exec_start;
+ queue_stats->runtime += runtime;
+
+ if (store_pid_stats) {
+ struct v3d_queue_pid_stats *pid_stats;
+ /* Last job info is always at the head of the list */
+ pid_stats = list_first_entry_or_null(pid_stats_list,
+ struct v3d_queue_pid_stats, list);
+ if (pid_stats &&
+ pid_stats->pid == queue_stats->last_pid) {
+ pid_stats->runtime += runtime;
+ }
+ }
+ queue_stats->last_pid = 0;
+ }
+}
+
+/*
+ * Updates the queue usage adding the information of a new job that is
+ * about to be sent to the GPU to be executed.
+ */
+int
+v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
+ struct drm_sched_job *sched_job)
+{
+
+ struct v3d_queue_pid_stats *pid_stats = NULL;
+ struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
+ struct v3d_queue_pid_stats *cur;
+ struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
+ int ret = 0;
+
+ mutex_lock(&queue_stats->lock);
+
+ /* Completion of previous job requires an update of its runtime stats */
+ v3d_sched_stats_update(queue_stats);
+
+ queue_stats->last_exec_start = local_clock();
+ queue_stats->last_exec_end = 0;
+ queue_stats->jobs_sent++;
+ queue_stats->last_pid = job->client_pid;
+
+ /* gpu usage stats by process are being collected */
+ if (time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout)) {
+ list_for_each_entry(cur, pid_stats_list, list) {
+ if (cur->pid == job->client_pid) {
+ pid_stats = cur;
+ break;
+ }
+ }
+ /* pid_stats of this client is moved to the head of the list. */
+ if (pid_stats) {
+ list_move(&pid_stats->list, pid_stats_list);
+ } else {
+ pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
+ GFP_KERNEL);
+ if (!pid_stats) {
+ ret = -ENOMEM;
+ goto err_mem;
+ }
+ pid_stats->pid = job->client_pid;
+ list_add(&pid_stats->list, pid_stats_list);
+ }
+ pid_stats->jobs_sent++;
+ pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
+ }
+
+err_mem:
+ mutex_unlock(&queue_stats->lock);
+ return ret;
+}
+
static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
{
struct v3d_bin_job *job = to_bin_job(sched_job);
trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
job->start, job->end);
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_BIN], sched_job);
v3d_switch_perfmon(v3d, &job->base);
/* Set the current and end address of the control list.
trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
job->start, job->end);
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_RENDER], sched_job);
v3d_switch_perfmon(v3d, &job->base);
/* XXX: Set the QCFG */
trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
V3D_WRITE(V3D_TFU_IIA, job->args.iia);
V3D_WRITE(V3D_TFU_IIS, job->args.iis);
V3D_WRITE(V3D_TFU_ICA, job->args.ica);
trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
v3d_switch_perfmon(v3d, &job->base);
for (i = 1; i <= 6; i++)
struct v3d_job *job = to_v3d_job(sched_job);
struct v3d_dev *v3d = job->v3d;
+ v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CACHE_CLEAN],
+ sched_job);
v3d_clean_caches(v3d);
+ v3d->gpu_queue_stats[V3D_CACHE_CLEAN].last_exec_end = local_clock();
return NULL;
}
int hw_jobs_limit = 1;
int job_hang_limit = 0;
int hang_limit_ms = 500;
+ enum v3d_queue q;
int ret;
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ INIT_LIST_HEAD(&v3d->gpu_queue_stats[q].pid_stats_list);
+ /* Setting timeout before current jiffies disables collecting
+ * pid_stats on scheduling init.
+ */
+ v3d->gpu_queue_stats[q].gpu_pid_stats_timeout = jiffies - 1;
+ mutex_init(&v3d->gpu_queue_stats[q].lock);
+ }
+
ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
&v3d_bin_sched_ops,
hw_jobs_limit, job_hang_limit,
v3d_sched_fini(struct v3d_dev *v3d)
{
enum v3d_queue q;
+ struct v3d_queue_stats *queue_stats;
for (q = 0; q < V3D_MAX_QUEUES; q++) {
- if (v3d->queue[q].sched.ready)
+ if (v3d->queue[q].sched.ready) {
+ queue_stats = &v3d->gpu_queue_stats[q];
+ mutex_lock(&queue_stats->lock);
+ /* Setting gpu_pid_stats_timeout to jiffies-1 will
+ * make v3d_sched_stats_update to purge all
+ * allocated pid_stats.
+ */
+ queue_stats->gpu_pid_stats_timeout = jiffies - 1;
+ v3d_sched_stats_update(queue_stats);
+ mutex_unlock(&queue_stats->lock);
drm_sched_fini(&v3d->queue[q].sched);
+ }
}
}