sched/core: uclamp: add CPU's clamp groups accounting

author Patrick Bellasi <patrick.bellasi@arm.com>

Wed, 16 Aug 2017 18:08:37 +0000 (19:08 +0100)

committer Douglas RAILLARD <douglas.raillard@arm.com>

Tue, 14 Aug 2018 15:32:24 +0000 (16:32 +0100)
author Patrick Bellasi <patrick.bellasi@arm.com>
Wed, 16 Aug 2017 18:08:37 +0000 (19:08 +0100)
committer Douglas RAILLARD <douglas.raillard@arm.com>
Tue, 14 Aug 2018 15:32:24 +0000 (16:32 +0100)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 4caa2686644b39ce4d3d65478e356d2c69dc0a6f..f058ceb14d25f3bd1edbba14672396a35891b2a7 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -825,9 +825,19 @@ static inline void uclamp_group_init(int clamp_id, int group_id,
                                      unsigned int clamp_value)
  {
         struct uclamp_map *uc_map = &uclamp_maps[clamp_id][0];
+       struct uclamp_cpu *uc_cpu;
+       int cpu;
  
+       /* Set clamp group map */
         uc_map[group_id].value = clamp_value;
         uc_map[group_id].se_count = 0;
+
+       /* Set clamp groups on all CPUs */
+       for_each_possible_cpu(cpu) {
+               uc_cpu = &cpu_rq(cpu)->uclamp;
+               uc_cpu->group[clamp_id][group_id].value = clamp_value;
+               uc_cpu->group[clamp_id][group_id].tasks = 0;
+       }
  }
  
  /**
@@ -882,6 +892,179 @@ uclamp_group_find(int clamp_id, unsigned int clamp_value)
         return -ENOSPC;
  }
  
+/**
+ * uclamp_cpu_update: updates the utilization clamp of a CPU
+ * @cpu: the CPU which utilization clamp has to be updated
+ * @clamp_id: the clamp index to update
+ *
+ * When tasks are enqueued/dequeued on/from a CPU, the set of currently active
+ * clamp groups is subject to change. Since each clamp group enforces a
+ * different utilization clamp value, once the set of these groups changes it
+ * can be required to re-compute what is the new clamp value to apply for that
+ * CPU.
+ *
+ * For the specified clamp index, this method computes the new CPU utilization
+ * clamp to use until the next change on the set of RUNNABLE tasks on that CPU.
+ */
+static inline void uclamp_cpu_update(struct rq *rq, int clamp_id)
+{
+       struct uclamp_group *uc_grp = &rq->uclamp.group[clamp_id][0];
+       int max_value = UCLAMP_NOT_VALID;
+       unsigned int group_id;
+
+       for (group_id = 0; group_id <= CONFIG_UCLAMP_GROUPS_COUNT; ++group_id) {
+               /* Ignore inactive clamp groups, i.e. no RUNNABLE tasks */
+               if (!uclamp_group_active(uc_grp, group_id))
+                       continue;
+
+               /* Both min and max clamp are MAX aggregated */
+               max_value = max(max_value, uc_grp[group_id].value);
+
+               /* Stop if we reach the max possible clamp */
+               if (max_value >= SCHED_CAPACITY_SCALE)
+                       break;
+       }
+       rq->uclamp.value[clamp_id] = max_value;
+}
+
+/**
+ * uclamp_cpu_get_id(): increase reference count for a clamp group on a CPU
+ * @p: the task being enqueued on a CPU
+ * @rq: the CPU's rq where the clamp group has to be reference counted
+ * @clamp_id: the utilization clamp (e.g. min or max utilization) to reference
+ *
+ * Once a task is enqueued on a CPU's RQ, the clamp group currently defined by
+ * the task's uclamp.group_id is reference counted on that CPU.
+ */
+static inline void uclamp_cpu_get_id(struct task_struct *p,
+                                    struct rq *rq, int clamp_id)
+{
+       struct uclamp_group *uc_grp;
+       struct uclamp_cpu *uc_cpu;
+       int clamp_value;
+       int group_id;
+
+       /* No task specific clamp values: nothing to do */
+       group_id = p->uclamp[clamp_id].group_id;
+       if (group_id == UCLAMP_NOT_VALID)
+               return;
+
+       /* Reference count the task into its current group_id */
+       uc_grp = &rq->uclamp.group[clamp_id][0];
+       uc_grp[group_id].tasks += 1;
+
+       /*
+        * If this is the new max utilization clamp value, then we can update
+        * straight away the CPU clamp value. Otherwise, the current CPU clamp
+        * value is still valid and we are done.
+        */
+       uc_cpu = &rq->uclamp;
+       clamp_value = p->uclamp[clamp_id].value;
+       if (uc_cpu->value[clamp_id] < clamp_value)
+               uc_cpu->value[clamp_id] = clamp_value;
+}
+
+/**
+ * uclamp_cpu_put_id(): decrease reference count for a clamp group on a CPU
+ * @p: the task being dequeued from a CPU
+ * @cpu: the CPU from where the clamp group has to be released
+ * @clamp_id: the utilization clamp (e.g. min or max utilization) to release
+ *
+ * When a task is dequeued from a CPU's RQ, the CPU's clamp group reference
+ * counted by the task is decreased.
+ * If this was the last task defining the current max clamp group, then the
+ * CPU clamping is updated to find the new max for the specified clamp
+ * index.
+ */
+static inline void uclamp_cpu_put_id(struct task_struct *p,
+                                    struct rq *rq, int clamp_id)
+{
+       struct uclamp_group *uc_grp;
+       struct uclamp_cpu *uc_cpu;
+       unsigned int clamp_value;
+       int group_id;
+
+       /* No task specific clamp values: nothing to do */
+       group_id = p->uclamp[clamp_id].group_id;
+       if (group_id == UCLAMP_NOT_VALID)
+               return;
+
+       /* Decrement the task's reference counted group index */
+       uc_grp = &rq->uclamp.group[clamp_id][0];
+#ifdef SCHED_DEBUG
+       if (unlikely(uc_grp[group_id].tasks == 0)) {
+               WARN(1, "invalid CPU[%d] clamp group [%d:%d] refcount\n",
+                    cpu_of(rq), clamp_id, group_id);
+               uc_grp[group_id].tasks = 1;
+       }
+#endif
+       uc_grp[group_id].tasks -= 1;
+
+       /* If this is not the last task, no updates are required */
+       if (uc_grp[group_id].tasks > 0)
+               return;
+
+       /*
+        * Update the CPU only if this was the last task of the group
+        * defining the current clamp value.
+        */
+       uc_cpu = &rq->uclamp;
+       clamp_value = uc_grp[group_id].value;
+       if (clamp_value >= uc_cpu->value[clamp_id])
+               uclamp_cpu_update(rq, clamp_id);
+}
+
+/**
+ * uclamp_cpu_get(): increase CPU's clamp group refcount
+ * @rq: the CPU's rq where the clamp group has to be refcounted
+ * @p: the task being enqueued
+ *
+ * Once a task is enqueued on a CPU's rq, all the clamp groups currently
+ * enforced on a task are reference counted on that rq.
+ * Not all scheduling classes have utilization clamping support, their tasks
+ * will be silently ignored.
+ *
+ * This method updates the utilization clamp constraints considering the
+ * requirements for the specified task. Thus, this update must be done before
+ * calling into the scheduling classes, which will eventually update schedutil
+ * considering the new task requirements.
+ */
+static inline void uclamp_cpu_get(struct rq *rq, struct task_struct *p)
+{
+       int clamp_id;
+
+       if (unlikely(!p->sched_class->uclamp_enabled))
+               return;
+
+       for (clamp_id = 0; clamp_id < UCLAMP_CNT; ++clamp_id)
+               uclamp_cpu_get_id(p, rq, clamp_id);
+}
+
+/**
+ * uclamp_cpu_put(): decrease CPU's clamp group refcount
+ * @cpu: the CPU's rq where the clamp group refcount has to be decreased
+ * @p: the task being dequeued
+ *
+ * When a task is dequeued from a CPU's rq, all the clamp groups the task has
+ * been reference counted at task's enqueue time have to be decreased for that
+ * CPU.
+ *
+ * This method updates the utilization clamp constraints considering the
+ * requirements for the specified task. Thus, this update must be done before
+ * calling into the scheduling classes, which will eventually update schedutil
+ * considering the new task requirements.
+ */
+static inline void uclamp_cpu_put(struct rq *rq, struct task_struct *p)
+{
+       int clamp_id;
+
+       if (unlikely(!p->sched_class->uclamp_enabled))
+               return;
+
+       for (clamp_id = 0; clamp_id < UCLAMP_CNT; ++clamp_id)
+               uclamp_cpu_put_id(p, rq, clamp_id);
+}
+
  /**
   * uclamp_group_put: decrease the reference count for a clamp group
   * @clamp_id: the clamp index which was affected by a task group
@@ -1026,9 +1209,17 @@ done:
  static void __init init_uclamp(void)
  {
         int clamp_id;
+       int cpu;
  
         mutex_init(&uclamp_mutex);
  
+       /* Init CPU's clamp groups */
+       for_each_possible_cpu(cpu) {
+               struct uclamp_cpu *uc_cpu = &cpu_rq(cpu)->uclamp;
+
+               memset(uc_cpu, UCLAMP_NOT_VALID, sizeof(struct uclamp_cpu));
+       }
+
         /* Init SE's clamp map */
         for (clamp_id = 0; clamp_id < UCLAMP_CNT; ++clamp_id) {
                 struct uclamp_map *uc_map = &uclamp_maps[clamp_id][0];
@@ -1042,6 +1233,8 @@ static void __init init_uclamp(void)
  }
  
  #else /* CONFIG_UCLAMP_TASK */
+static inline void uclamp_cpu_get(struct rq *rq, struct task_struct *p) { }
+static inline void uclamp_cpu_put(struct rq *rq, struct task_struct *p) { }
  static inline int __setscheduler_uclamp(struct task_struct *p,
                                         const struct sched_attr *attr)
  {
@@ -1058,6 +1251,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
         if (!(flags & ENQUEUE_RESTORE))
                 sched_info_queued(rq, p);
  
+       uclamp_cpu_get(rq, p);
         p->sched_class->enqueue_task(rq, p, flags);
  }
  
@@ -1069,6 +1263,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
         if (!(flags & DEQUEUE_SAVE))
                 sched_info_dequeued(rq, p);
  
+       uclamp_cpu_put(rq, p);
         p->sched_class->dequeue_task(rq, p, flags);
  }
  
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 8324c5e8f3ceadeea90af8887ead0b442ba70548..37c8047a72569bf9b1dcb7d6dc8b6d060b265f59 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10411,6 +10411,10 @@ const struct sched_class fair_sched_class = {
  #ifdef CONFIG_FAIR_GROUP_SCHED
         .task_change_group      = task_change_group_fair,
  #endif
+
+#ifdef CONFIG_UCLAMP_TASK
+       .uclamp_enabled         = 1,
+#endif
  };
  
  #ifdef CONFIG_SCHED_DEBUG
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

index 2e2955a8cf8fe3648a007036dde85320f5834a45..06ec33467dd969d8b6df5581514b0f113a4ccfaf 100644 (file)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2404,6 +2404,10 @@ const struct sched_class rt_sched_class = {
         .switched_to            = switched_to_rt,
  
         .update_curr            = update_curr_rt,
+
+#ifdef CONFIG_UCLAMP_TASK
+       .uclamp_enabled         = 1,
+#endif
  };
  
  #ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index dcb1be6b3f5a3816bfcdc7e8d90f424665462b86..b6d83bfa0eedeff9ca75bc60174c18dd626390b1 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -790,6 +790,50 @@ extern void rto_push_irq_work_func(struct irq_work *work);
  #endif
  #endif /* CONFIG_SMP */
  
+#ifdef CONFIG_UCLAMP_TASK
+/**
+ * struct uclamp_group - Utilization clamp Group
+ * @value: utilization clamp value for tasks on this clamp group
+ * @tasks: number of RUNNABLE tasks on this clamp group
+ *
+ * Keep track of how many tasks are RUNNABLE for a given utilization
+ * clamp value.
+ */
+struct uclamp_group {
+       int value;
+       int tasks;
+};
+
+/**
+ * struct uclamp_cpu - CPU's utilization clamp
+ * @value: currently active clamp values for a CPU
+ * @group: utilization clamp groups affecting a CPU
+ *
+ * Keep track of RUNNABLE tasks on a CPUs to aggregate their clamp values.
+ * A clamp value is affecting a CPU where there is at least one task RUNNABLE
+ * (or actually running) with that value.
+ *
+ * We have up to UCLAMP_CNT possible different clamp value, which are
+ * currently only two: minmum utilization and maximum utilization.
+ *
+ * All utilization clamping values are MAX aggregated, since:
+ * - for util_min: we want to run the CPU at least at the max of the minimum
+ *   utilization required by its currently RUNNABLE tasks.
+ * - for util_max: we want to allow the CPU to run up to the max of the
+ *   maximum utilization allowed by its currently RUNNABLE tasks.
+ *
+ * Since on each system we expect only a limited number of different
+ * utilization clamp values (CONFIG_UCLAMP_GROUPS_COUNT), we use a simple
+ * array to track the metrics required to compute all the per-CPU utilization
+ * clamp values. The additional slot is used to track the default clamp
+ * values, i.e. no min/max clamping at all.
+ */
+struct uclamp_cpu {
+       int value[UCLAMP_CNT];
+       struct uclamp_group group[UCLAMP_CNT][CONFIG_UCLAMP_GROUPS_COUNT + 1];
+};
+#endif /* CONFIG_UCLAMP_TASK */
+
  /*
   * This is the main, per-CPU runqueue data structure.
   *
@@ -827,6 +871,11 @@ struct rq {
         unsigned long           nr_load_updates;
         u64                     nr_switches;
  
+#ifdef CONFIG_UCLAMP_TASK
+       /* Utilization clamp values based on CPU's RUNNABLE tasks */
+       struct uclamp_cpu       uclamp ____cacheline_aligned;
+#endif
+
         struct cfs_rq           cfs;
         struct rt_rq            rt;
         struct dl_rq            dl;
@@ -1591,6 +1640,10 @@ struct sched_class {
  #ifdef CONFIG_FAIR_GROUP_SCHED
         void (*task_change_group)(struct task_struct *p, int type);
  #endif
+
+#ifdef CONFIG_UCLAMP_TASK
+       int uclamp_enabled;
+#endif
  };
  
  static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
@@ -2149,6 +2202,24 @@ static inline u64 irq_time_read(int cpu)
  }
  #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
  
+#ifdef CONFIG_UCLAMP_TASK
+/**
+ * uclamp_group_active: check if a clamp group is active on a CPU
+ * @uc_grp: the clamp groups for a CPU
+ * @group_id: the clamp group to check
+ *
+ * A clamp group affects a CPU if it has at least one RUNNABLE task.
+ *
+ * Return: true if the specified CPU has at least one RUNNABLE task
+ *         for the specified clamp group.
+ */
+static inline bool uclamp_group_active(struct uclamp_group *uc_grp,
+                                      int group_id)
+{
+       return uc_grp[group_id].tasks > 0;
+}
+#endif /* CONFIG_UCLAMP_TASK */
+
  #ifdef CONFIG_CPU_FREQ
  DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
author	Patrick Bellasi <patrick.bellasi@arm.com>
	Wed, 16 Aug 2017 18:08:37 +0000 (19:08 +0100)
committer	Douglas RAILLARD <douglas.raillard@arm.com>
	Tue, 14 Aug 2018 15:32:24 +0000 (16:32 +0100)
kernel/sched/core.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/rt.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history