sched/events: Introduce sched_entity load tracking trace event
authorDietmar Eggemann <dietmar.eggemann@arm.com>
Mon, 20 Mar 2017 17:26:47 +0000 (17:26 +0000)
committerDouglas RAILLARD <douglas.raillard@arm.com>
Tue, 14 Aug 2018 15:32:34 +0000 (16:32 +0100)
The following trace event keys are mapped to:

 (1) load     : se->avg.load_avg

 (2) rbl_load : se->avg.runnable_load_avg

 (3) util     : se->avg.util_avg

To let this trace event work for configurations w/ and w/o group
scheduling support for cfs (CONFIG_FAIR_GROUP_SCHED) the following
special handling is necessary for non-existent key=value pairs:

 path = "(null)" : In case of !CONFIG_FAIR_GROUP_SCHED or the
                   sched_entity represents a task.

 comm = "(null)" : In case sched_entity represents a task_group.

 pid = -1        : In case sched_entity represents a task_group.

The following list shows examples of the key=value pairs in different
configurations for:

 (1) a task:

     cpu=0 path=(null) comm=sshd pid=2206 load=102 rbl_load=102  util=102

 (2) a taskgroup:

     cpu=1 path=/tg1/tg11/tg111 comm=(null) pid=-1 load=882 rbl_load=882 util=510

 (3) an autogroup:

     cpu=0 path=/autogroup-13 comm=(null) pid=-1 load=49 rbl_load=49 util=48

 (4) w/o CONFIG_FAIR_GROUP_SCHED:

     cpu=0 path=(null) comm=sshd pid=2211 load=301 rbl_load=301 util=265

The trace event is only defined for CONFIG_SMP.

The helper functions __trace_sched_cpu(), __trace_sched_path() and
__trace_sched_id() are extended to deal with sched_entities as well.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
include/trace/events/sched.h
kernel/sched/fair.c
kernel/sched/pelt.c

index 8c79c661772f20d86675bcc34b2b645ed99d89e2..989494d9954161b348319e191926d85f2b59e82d 100644 (file)
@@ -576,14 +576,15 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
 #ifdef CONFIG_SMP
 #ifdef CREATE_TRACE_POINTS
 static inline
-int __trace_sched_cpu(struct cfs_rq *cfs_rq)
+int __trace_sched_cpu(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_FAIR_GROUP_SCHED
-       struct rq *rq = cfs_rq->rq;
+       struct rq *rq = cfs_rq ? cfs_rq->rq : NULL;
 #else
-       struct rq *rq = container_of(cfs_rq, struct rq, cfs);
+       struct rq *rq = cfs_rq ? container_of(cfs_rq, struct rq, cfs) : NULL;
 #endif
-       return cpu_of(rq);
+       return rq ? cpu_of(rq)
+                 : task_cpu((container_of(se, struct task_struct, se)));
 }
 
 static inline
@@ -592,18 +593,26 @@ int __trace_sched_path(struct cfs_rq *cfs_rq, char *path, int len)
 #ifdef CONFIG_FAIR_GROUP_SCHED
        int l = path ? len : 0;
 
-       if (task_group_is_autogroup(cfs_rq->tg))
+       if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
                return autogroup_path(cfs_rq->tg, path, l) + 1;
-       else
+       else if (cfs_rq && cfs_rq->tg->css.cgroup)
                return cgroup_path(cfs_rq->tg->css.cgroup, path, l) + 1;
-#else
+#endif
        if (path)
                strcpy(path, "(null)");
 
        return strlen("(null)");
-#endif
 }
 
+static inline
+struct cfs_rq *__trace_sched_group_cfs_rq(struct sched_entity *se)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       return se->my_q;
+#else
+       return NULL;
+#endif
+}
 #endif /* CREATE_TRACE_POINTS */
 
 /*
@@ -625,7 +634,7 @@ TRACE_EVENT(sched_load_cfs_rq,
        ),
 
        TP_fast_assign(
-               __entry->cpu            = __trace_sched_cpu(cfs_rq);
+               __entry->cpu            = __trace_sched_cpu(cfs_rq, NULL);
                __trace_sched_path(cfs_rq, __get_dynamic_array(path),
                                   __get_dynamic_array_len(path));
                __entry->load           = cfs_rq->avg.load_avg;
@@ -637,6 +646,46 @@ TRACE_EVENT(sched_load_cfs_rq,
                  __entry->cpu, __get_str(path), __entry->load,
                  __entry->rbl_load,__entry->util)
 );
+
+/*
+ * Tracepoint for sched_entity load tracking:
+ */
+TRACE_EVENT(sched_load_se,
+
+       TP_PROTO(struct sched_entity *se),
+
+       TP_ARGS(se),
+
+       TP_STRUCT__entry(
+               __field(        int,            cpu                           )
+               __dynamic_array(char,           path,
+                 __trace_sched_path(__trace_sched_group_cfs_rq(se), NULL, 0) )
+               __array(        char,           comm,   TASK_COMM_LEN         )
+               __field(        pid_t,          pid                           )
+               __field(        unsigned long,  load                          )
+               __field(        unsigned long,  rbl_load                      )
+               __field(        unsigned long,  util                          )
+       ),
+
+       TP_fast_assign(
+               struct cfs_rq *gcfs_rq = __trace_sched_group_cfs_rq(se);
+               struct task_struct *p = gcfs_rq ? NULL
+                                   : container_of(se, struct task_struct, se);
+
+               __entry->cpu            = __trace_sched_cpu(gcfs_rq, se);
+               __trace_sched_path(gcfs_rq, __get_dynamic_array(path),
+                                  __get_dynamic_array_len(path));
+               memcpy(__entry->comm, p ? p->comm : "(null)", TASK_COMM_LEN);
+               __entry->pid = p ? p->pid : -1;
+               __entry->load = se->avg.load_avg;
+               __entry->rbl_load = se->avg.runnable_load_avg;
+               __entry->util = se->avg.util_avg;
+       ),
+
+       TP_printk("cpu=%d path=%s comm=%s pid=%d load=%lu rbl_load=%lu util=%lu",
+                 __entry->cpu, __get_str(path), __entry->comm, __entry->pid,
+                 __entry->load, __entry->rbl_load, __entry->util)
+);
 #endif /* CONFIG_SMP */
 #endif /* _TRACE_SCHED_H */
 
index 8a4abcc469a4406b6c99c2e79a2bd19fb687a40b..3ae1ea8620a46c453479a97e1be218f3df2eaaa5 100644 (file)
@@ -3253,6 +3253,7 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
        update_tg_cfs_runnable(cfs_rq, se, gcfs_rq);
 
        trace_sched_load_cfs_rq(cfs_rq);
+       trace_sched_load_se(se);
 
        return 1;
 }
index f042ee0f7b081780a61bc5b0633508cd1d38f09b..95923939c027353e37e2aa0da4a199aa78b11144 100644 (file)
@@ -276,6 +276,9 @@ int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
 
        if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
                ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
+
+               trace_sched_load_se(se);
+
                return 1;
        }
 
@@ -292,6 +295,9 @@ int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_e
 
                ___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
                cfs_se_util_change(&se->avg);
+
+               trace_sched_load_se(se);
+
                return 1;
        }