sched: Initialize energy data structures
authorDietmar Eggemann <dietmar.eggemann@arm.com>
Fri, 14 Nov 2014 16:20:20 +0000 (16:20 +0000)
committerLukasz Luba <l.luba@partner.samsung.com>
Mon, 10 Sep 2018 08:20:36 +0000 (10:20 +0200)
The sched_group_energy (sge) pointer of the first sched_group (sg) in
the sched_domain (sd) is initialized to point to the appropriate (in
terms of sd level and cpu) sge data defined in the arch and so to the
correct part of the Energy Model (EM).

Energy-aware scheduling allows that a system has only EM data up to a
certain sd level (so called highest energy aware balancing sd level).
A check in init_sched_energy() enforces that all sd's below this sd
level contain EM data.

The 'int cpu' parameter of sched_domain_energy_f requires that
check_sched_energy_data() makes sure that all cpus spanned by a sg
are provisioned with the same EM data.

This patch has also been tested with feature FORCE_SD_OVERLAP enabled.

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
kernel/sched/topology.c

index 519b024f4e94f9385e2e7df913dba2fb314410d1..9e3be1f8f0c8da49d174f5c449a2b4fc8cf1c23c 100644 (file)
@@ -948,6 +948,73 @@ next:
        update_group_capacity(sd, cpu);
 }
 
+#define energy_eff(e, n) \
+    ((e->cap_states[n].cap << SCHED_CAPACITY_SHIFT)/e->cap_states[n].power)
+
+static void init_sched_groups_energy(int cpu, struct sched_domain *sd,
+                                    sched_domain_energy_f fn)
+{
+       struct sched_group *sg = sd->groups;
+       const struct sched_group_energy *sge;
+       int i;
+
+       if (!(fn && fn(cpu)))
+               return;
+
+       if (cpu != group_balance_cpu(sg))
+               return;
+
+       if (sd->flags & SD_OVERLAP) {
+               pr_err("BUG: EAS does not support overlapping sd spans\n");
+#ifdef CONFIG_SCHED_DEBUG
+               pr_err("     the %s domain has SD_OVERLAP set\n", sd->name);
+#endif
+               return;
+       }
+
+       if (sd->child && !sd->child->groups->sge) {
+               pr_err("BUG: EAS setup borken for CPU%d\n", cpu);
+#ifdef CONFIG_SCHED_DEBUG
+               pr_err("     energy data on %s but not on %s domain\n",
+                       sd->name, sd->child->name);
+#endif
+               return;
+       }
+
+       sge = fn(cpu);
+
+       /*
+        * Check that the per-cpu provided sd energy data is consistent for all
+        * cpus within the mask.
+        */
+       if (cpumask_weight(sched_group_span(sg)) > 1) {
+               struct cpumask mask;
+
+               cpumask_xor(&mask, sched_group_span(sg), get_cpu_mask(cpu));
+
+               for_each_cpu(i, &mask)
+                       BUG_ON(sge != fn(i));
+       }
+
+       /* Check that energy efficiency (capacity/power) is monotonically
+        * decreasing in the capacity state vector with higher indexes
+        */
+       for (i = 0; i < (sge->nr_cap_states - 1); i++) {
+               if (energy_eff(sge, i) > energy_eff(sge, i+1))
+                       continue;
+#ifdef CONFIG_SCHED_DEBUG
+               pr_warn("WARN: cpu=%d, domain=%s: incr. energy eff %lu[%d]->%lu[%d]\n",
+                       cpu, sd->name, energy_eff(sge, i), i,
+                       energy_eff(sge, i+1), i+1);
+#else
+               pr_warn("WARN: cpu=%d: incr. energy eff %lu[%d]->%lu[%d]\n",
+                       cpu, energy_eff(sge, i), i, energy_eff(sge, i+1), i+1);
+#endif
+       }
+
+       sd->groups->sge = fn(cpu);
+}
+
 /*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -1689,10 +1756,13 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 
        /* Calculate CPU capacity for physical packages and nodes */
        for (i = nr_cpumask_bits-1; i >= 0; i--) {
+               struct sched_domain_topology_level *tl = sched_domain_topology;
+
                if (!cpumask_test_cpu(i, cpu_map))
                        continue;
 
-               for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+               for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent, tl++) {
+                       init_sched_groups_energy(i, sd, tl->energy);
                        claim_allocations(i, sd);
                        init_sched_groups_capacity(i, sd);
                }