sched/fair: Energy-aware wake-up task placement
authorMorten Rasmussen <morten.rasmussen@arm.com>
Wed, 30 Mar 2016 13:29:48 +0000 (14:29 +0100)
committerLukasz Luba <l.luba@partner.samsung.com>
Mon, 10 Sep 2018 08:21:01 +0000 (10:21 +0200)
When the systems is not overutilized, place waking tasks on the most
energy efficient cpu. Previous attempts reduced the search space by
matching task utilization to cpu capacity before consulting the energy
model as this is an expensive operation. The search heuristics didn't
work very well and lacking any better alternatives this patch takes the
brute-force route and tries all potential targets.

This approach doesn't scale, but it might be sufficient for many
embedded applications while work is continuing on a heuristic that can
minimize the necessary computations. The heuristic must be derrived from
the platform energy model rather than make additional assumptions, such
lower capacity implies better energy efficiency. PeterZ mentioned in the
past that we might be able to derrive some simpler deciding functions
using mathematical (modal?) analysis.

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
kernel/sched/fair.c

index 39b012641cb74fbf1abb48d262cc162e5b6bfb69..2bd9979387ec18dd3ccc4b0798e0b558b9b3bdee 100644 (file)
@@ -6557,6 +6557,60 @@ static bool cpu_overutilized(int cpu)
 static inline bool nohz_kick_needed(struct rq *rq, bool only_update);
 static void nohz_balancer_kick(bool only_update);
 
+static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu)
+{
+       int i;
+       int min_diff = 0, energy_cpu = prev_cpu, spare_cpu = prev_cpu;
+       unsigned long max_spare = 0;
+       struct sched_domain *sd;
+
+       rcu_read_lock();
+
+       sd = rcu_dereference(per_cpu(sd_ea, prev_cpu));
+
+       if (!sd)
+               goto unlock;
+
+       for_each_cpu_and(i, &p->cpus_allowed, sched_domain_span(sd)) {
+               int diff;
+               unsigned long spare;
+
+               struct energy_env eenv = {
+                       .util_delta     = task_util(p),
+                       .src_cpu        = prev_cpu,
+                       .dst_cpu        = i,
+               };
+
+               spare = capacity_spare_wake(i, p);
+
+               if (i == prev_cpu)
+                       continue;
+
+               if (spare > max_spare) {
+                       max_spare = spare;
+                       spare_cpu = i;
+               }
+
+               if (spare * 1024 < capacity_margin * task_util(p))
+                       continue;
+
+               diff = energy_diff(&eenv);
+
+               if (diff < min_diff) {
+                       min_diff = diff;
+                       energy_cpu = i;
+               }
+       }
+
+unlock:
+       rcu_read_unlock();
+
+       if (energy_cpu == prev_cpu && !cpu_overutilized(prev_cpu))
+               return prev_cpu;
+
+       return energy_cpu != prev_cpu ? energy_cpu : spare_cpu;
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -6584,6 +6638,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
                              && cpumask_test_cpu(cpu, &p->cpus_allowed);
        }
 
+       if (energy_aware() && !(cpu_rq(prev_cpu)->rd->overutilized))
+               return select_energy_cpu_brute(p, prev_cpu);
+
        rcu_read_lock();
        for_each_domain(cpu, tmp) {
                if (!(tmp->flags & SD_LOAD_BALANCE))