From: Vincent Guittot Date: Tue, 19 Oct 2021 12:35:35 +0000 (+0200) Subject: sched/fair: Wait before decaying max_newidle_lb_cost X-Git-Tag: v6.6.17~8991^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e60b56e46b384cee1ad34e6adc164d883049c6c3;p=platform%2Fkernel%2Flinux-rpi.git sched/fair: Wait before decaying max_newidle_lb_cost Decay max_newidle_lb_cost only when it has not been updated for a while and ensure to not decay a recently changed value. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Dietmar Eggemann Acked-by: Mel Gorman Link: https://lore.kernel.org/r/20211019123537.17146-4-vincent.guittot@linaro.org --- diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 2f9166f..c07bfa2 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -105,7 +105,7 @@ struct sched_domain { /* idle_balance() stats */ u64 max_newidle_lb_cost; - unsigned long next_decay_max_lb_cost; + unsigned long last_decay_max_lb_cost; u64 avg_scan_cost; /* select_idle_sibling */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c4c3686..e50fd751 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10239,6 +10239,30 @@ void update_max_interval(void) max_load_balance_interval = HZ*num_online_cpus()/10; } +static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost) +{ + if (cost > sd->max_newidle_lb_cost) { + /* + * Track max cost of a domain to make sure to not delay the + * next wakeup on the CPU. + */ + sd->max_newidle_lb_cost = cost; + sd->last_decay_max_lb_cost = jiffies; + } else if (time_after(jiffies, sd->last_decay_max_lb_cost + HZ)) { + /* + * Decay the newidle max times by ~1% per second to ensure that + * it is not outdated and the current max cost is actually + * shorter. + */ + sd->max_newidle_lb_cost = (sd->max_newidle_lb_cost * 253) / 256; + sd->last_decay_max_lb_cost = jiffies; + + return true; + } + + return false; +} + /* * It checks each scheduling domain to see if it is due to be balanced, * and initiates a balancing operation if so. @@ -10262,14 +10286,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) for_each_domain(cpu, sd) { /* * Decay the newidle max times here because this is a regular - * visit to all the domains. Decay ~1% per second. + * visit to all the domains. */ - if (time_after(jiffies, sd->next_decay_max_lb_cost)) { - sd->max_newidle_lb_cost = - (sd->max_newidle_lb_cost * 253) / 256; - sd->next_decay_max_lb_cost = jiffies + HZ; - need_decay = 1; - } + need_decay = update_newidle_cost(sd, 0); max_cost += sd->max_newidle_lb_cost; /* @@ -10911,8 +10930,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) t1 = sched_clock_cpu(this_cpu); domain_cost = t1 - t0; - if (domain_cost > sd->max_newidle_lb_cost) - sd->max_newidle_lb_cost = domain_cost; + update_newidle_cost(sd, domain_cost); curr_cost += domain_cost; t0 = t1; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index e812467..30169c7 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1568,7 +1568,7 @@ sd_init(struct sched_domain_topology_level *tl, .last_balance = jiffies, .balance_interval = sd_weight, .max_newidle_lb_cost = 0, - .next_decay_max_lb_cost = jiffies, + .last_decay_max_lb_cost = jiffies, .child = child, #ifdef CONFIG_SCHED_DEBUG .name = tl->name,