From 26762423a2664692de2bcccc9de684a5ac105e23 Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Thu, 8 Oct 2020 23:48:46 +0800 Subject: [PATCH] sched/deadline: Optimize sched_dl_global_validate() Under CONFIG_SMP, dl_bw is per root domain, but not per CPU. When checking or updating dl_bw, currently iterating every CPU is overdoing, just need iterate each root domain once. Suggested-by: Peter Zijlstra Signed-off-by: Peng Liu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Acked-by: Juri Lelli Link: https://lkml.kernel.org/r/78d21ee792cc48ff79e8cd62a5f26208463684d6.1602171061.git.iwtbavbm@gmail.com --- kernel/sched/deadline.c | 39 ++++++++++++++++++++++++++++++++------- kernel/sched/sched.h | 9 +++++++++ kernel/sched/topology.c | 1 + 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f232305..98d96d4 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -97,6 +97,17 @@ static inline unsigned long dl_bw_capacity(int i) return __dl_bw_capacity(i); } } + +static inline bool dl_bw_visited(int cpu, u64 gen) +{ + struct root_domain *rd = cpu_rq(cpu)->rd; + + if (rd->visit_gen == gen) + return true; + + rd->visit_gen = gen; + return false; +} #else static inline struct dl_bw *dl_bw_of(int i) { @@ -112,6 +123,11 @@ static inline unsigned long dl_bw_capacity(int i) { return SCHED_CAPACITY_SCALE; } + +static inline bool dl_bw_visited(int cpu, u64 gen) +{ + return false; +} #endif static inline @@ -2535,11 +2551,15 @@ const struct sched_class dl_sched_class .update_curr = update_curr_dl, }; +/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */ +static u64 dl_generation; + int sched_dl_global_validate(void) { u64 runtime = global_rt_runtime(); u64 period = global_rt_period(); u64 new_bw = to_ratio(period, runtime); + u64 gen = ++dl_generation; struct dl_bw *dl_b; int cpu, ret = 0; unsigned long flags; @@ -2548,13 +2568,13 @@ int sched_dl_global_validate(void) * Here we want to check the bandwidth not being set to some * value smaller than the currently allocated bandwidth in * any of the root_domains. - * - * FIXME: Cycling on all the CPUs is overdoing, but simpler than - * cycling on root_domains... Discussion on different/better - * solutions is welcome! */ for_each_possible_cpu(cpu) { rcu_read_lock_sched(); + + if (dl_bw_visited(cpu, gen)) + goto next; + dl_b = dl_bw_of(cpu); raw_spin_lock_irqsave(&dl_b->lock, flags); @@ -2562,6 +2582,7 @@ int sched_dl_global_validate(void) ret = -EBUSY; raw_spin_unlock_irqrestore(&dl_b->lock, flags); +next: rcu_read_unlock_sched(); if (ret) @@ -2587,6 +2608,7 @@ static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq) void sched_dl_do_global(void) { u64 new_bw = -1; + u64 gen = ++dl_generation; struct dl_bw *dl_b; int cpu; unsigned long flags; @@ -2597,11 +2619,14 @@ void sched_dl_do_global(void) if (global_rt_runtime() != RUNTIME_INF) new_bw = to_ratio(global_rt_period(), global_rt_runtime()); - /* - * FIXME: As above... - */ for_each_possible_cpu(cpu) { rcu_read_lock_sched(); + + if (dl_bw_visited(cpu, gen)) { + rcu_read_unlock_sched(); + continue; + } + dl_b = dl_bw_of(cpu); raw_spin_lock_irqsave(&dl_b->lock, flags); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index df80bfc..49a2dae 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -801,6 +801,15 @@ struct root_domain { struct dl_bw dl_bw; struct cpudl cpudl; + /* + * Indicate whether a root_domain's dl_bw has been checked or + * updated. It's monotonously increasing value. + * + * Also, some corner cases, like 'wrap around' is dangerous, but given + * that u64 is 'big enough'. So that shouldn't be a concern. + */ + u64 visit_gen; + #ifdef HAVE_RT_PUSH_IPI /* * For IPI pull requests, loop across the rto_mask. diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index dd77702..90f3e55 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -516,6 +516,7 @@ static int init_rootdomain(struct root_domain *rd) init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); #endif + rd->visit_gen = 0; init_dl_bw(&rd->dl_bw); if (cpudl_init(&rd->cpudl) != 0) goto free_rto_mask; -- 2.7.4