sched: Add sched_group per-cpu max capacity
authorMorten Rasmussen <morten.rasmussen@arm.com>
Wed, 4 Jul 2018 10:17:41 +0000 (11:17 +0100)
committerDouglas RAILLARD <douglas.raillard@arm.com>
Tue, 14 Aug 2018 15:32:19 +0000 (16:32 +0100)
The current sg->min_capacity tracks the lowest per-cpu compute capacity
available in the sched_group when rt/irq pressure is taken into account.
Minimum capacity isn't the ideal metric for tracking if a sched_group
needs offloading to another sched_group for some scenarios, e.g. a
sched_group with multiple cpus if only one is under heavy pressure.
Tracking maximum capacity isn't perfect either but a better choice for
some situations as it indicates that the sched_group definitely compute
capacity constrained either due to rt/irq pressure on all cpus or
asymmetric cpu capacities (e.g. big.LITTLE).

cc: Ingo Molnar <mingo@redhat.com>
cc: Peter Zijlstra <peterz@infradead.org>

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
kernel/sched/fair.c
kernel/sched/sched.h
kernel/sched/topology.c

index 1170f902a0168800b1f7c985e79b437914f6e50f..3aa525d7cd506905f5977062efe9f43b8ed8cfc9 100644 (file)
@@ -7769,13 +7769,14 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
        cpu_rq(cpu)->cpu_capacity = capacity;
        sdg->sgc->capacity = capacity;
        sdg->sgc->min_capacity = capacity;
+       sdg->sgc->max_capacity = capacity;
 }
 
 void update_group_capacity(struct sched_domain *sd, int cpu)
 {
        struct sched_domain *child = sd->child;
        struct sched_group *group, *sdg = sd->groups;
-       unsigned long capacity, min_capacity;
+       unsigned long capacity, min_capacity, max_capacity;
        unsigned long interval;
 
        interval = msecs_to_jiffies(sd->balance_interval);
@@ -7789,6 +7790,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
        capacity = 0;
        min_capacity = ULONG_MAX;
+       max_capacity = 0;
 
        if (child->flags & SD_OVERLAP) {
                /*
@@ -7819,6 +7821,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                        }
 
                        min_capacity = min(capacity, min_capacity);
+                       max_capacity = max(capacity, max_capacity);
                }
        } else  {
                /*
@@ -7832,12 +7835,14 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
                        capacity += sgc->capacity;
                        min_capacity = min(sgc->min_capacity, min_capacity);
+                       max_capacity = max(sgc->max_capacity, max_capacity);
                        group = group->next;
                } while (group != child->groups);
        }
 
        sdg->sgc->capacity = capacity;
        sdg->sgc->min_capacity = min_capacity;
+       sdg->sgc->max_capacity = max_capacity;
 }
 
 /*
@@ -7933,16 +7938,27 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
 }
 
 /*
- * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ * group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
  * per-CPU capacity than sched_group ref.
  */
 static inline bool
-group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
 {
        return sg->sgc->min_capacity * capacity_margin <
                                                ref->sgc->min_capacity * 1024;
 }
 
+/*
+ * group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
+ * per-CPU capacity_orig than sched_group ref.
+ */
+static inline bool
+group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+{
+       return sg->sgc->max_capacity * capacity_margin <
+                                               ref->sgc->max_capacity * 1024;
+}
+
 static inline enum
 group_type group_classify(struct sched_group *group,
                          struct sg_lb_stats *sgs)
@@ -8091,7 +8107,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
         * power/energy consequences are not considered.
         */
        if (sgs->sum_nr_running <= sgs->group_weight &&
-           group_smaller_cpu_capacity(sds->local, sg))
+           group_smaller_min_cpu_capacity(sds->local, sg))
                return false;
 
 asym_packing:
index f4da714986d70928bf3e71683e2cc34ea6e80d33..17267ab29987c996cd320050d374a7b3e4fa0b19 100644 (file)
@@ -1220,6 +1220,7 @@ struct sched_group_capacity {
         */
        unsigned long           capacity;
        unsigned long           min_capacity;           /* Min per-CPU capacity in group */
+       unsigned long           max_capacity;           /* Max per-CPU capacity in group */
        unsigned long           next_update;
        int                     imbalance;              /* XXX unrelated to capacity but shared group state */
 
index dcaaca2c0a885d6aff4c748b1f778c0d8e5c165a..0104e12abf726e5aea628819f7740dd32e5b66c6 100644 (file)
@@ -898,6 +898,7 @@ static void init_overlap_sched_group(struct sched_domain *sd,
        sg_span = sched_group_span(sg);
        sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
        sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+       sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 }
 
 static int
@@ -1057,6 +1058,7 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 
        sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
        sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+       sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 
        return sg;
 }