sched/fair: Wait before decaying max_newidle_lb_cost
authorVincent Guittot <vincent.guittot@linaro.org>
Tue, 19 Oct 2021 12:35:35 +0000 (14:35 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Sun, 31 Oct 2021 10:11:38 +0000 (11:11 +0100)
Decay max_newidle_lb_cost only when it has not been updated for a while
and ensure to not decay a recently changed value.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lore.kernel.org/r/20211019123537.17146-4-vincent.guittot@linaro.org
include/linux/sched/topology.h
kernel/sched/fair.c
kernel/sched/topology.c

index 2f9166f6dec8a7496782ecffb4da04f4b0004646..c07bfa2d80f29c104a26d92403e718931574837c 100644 (file)
@@ -105,7 +105,7 @@ struct sched_domain {
 
        /* idle_balance() stats */
        u64 max_newidle_lb_cost;
-       unsigned long next_decay_max_lb_cost;
+       unsigned long last_decay_max_lb_cost;
 
        u64 avg_scan_cost;              /* select_idle_sibling */
 
index c4c36865321b75ef1d8c2809d88f464641eb7863..e50fd751e1dfa0c55f218d2dc779d10e4813786c 100644 (file)
@@ -10239,6 +10239,30 @@ void update_max_interval(void)
        max_load_balance_interval = HZ*num_online_cpus()/10;
 }
 
+static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost)
+{
+       if (cost > sd->max_newidle_lb_cost) {
+               /*
+                * Track max cost of a domain to make sure to not delay the
+                * next wakeup on the CPU.
+                */
+               sd->max_newidle_lb_cost = cost;
+               sd->last_decay_max_lb_cost = jiffies;
+       } else if (time_after(jiffies, sd->last_decay_max_lb_cost + HZ)) {
+               /*
+                * Decay the newidle max times by ~1% per second to ensure that
+                * it is not outdated and the current max cost is actually
+                * shorter.
+                */
+               sd->max_newidle_lb_cost = (sd->max_newidle_lb_cost * 253) / 256;
+               sd->last_decay_max_lb_cost = jiffies;
+
+               return true;
+       }
+
+       return false;
+}
+
 /*
  * It checks each scheduling domain to see if it is due to be balanced,
  * and initiates a balancing operation if so.
@@ -10262,14 +10286,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
        for_each_domain(cpu, sd) {
                /*
                 * Decay the newidle max times here because this is a regular
-                * visit to all the domains. Decay ~1% per second.
+                * visit to all the domains.
                 */
-               if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
-                       sd->max_newidle_lb_cost =
-                               (sd->max_newidle_lb_cost * 253) / 256;
-                       sd->next_decay_max_lb_cost = jiffies + HZ;
-                       need_decay = 1;
-               }
+               need_decay = update_newidle_cost(sd, 0);
                max_cost += sd->max_newidle_lb_cost;
 
                /*
@@ -10911,8 +10930,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 
                        t1 = sched_clock_cpu(this_cpu);
                        domain_cost = t1 - t0;
-                       if (domain_cost > sd->max_newidle_lb_cost)
-                               sd->max_newidle_lb_cost = domain_cost;
+                       update_newidle_cost(sd, domain_cost);
 
                        curr_cost += domain_cost;
                        t0 = t1;
index e81246787560abf75004ec63c01729e945020677..30169c7685b641da3ed14b93697a7dec9b7c06f7 100644 (file)
@@ -1568,7 +1568,7 @@ sd_init(struct sched_domain_topology_level *tl,
                .last_balance           = jiffies,
                .balance_interval       = sd_weight,
                .max_newidle_lb_cost    = 0,
-               .next_decay_max_lb_cost = jiffies,
+               .last_decay_max_lb_cost = jiffies,
                .child                  = child,
 #ifdef CONFIG_SCHED_DEBUG
                .name                   = tl->name,