Merge branch 'sched/warnings' into sched/core, to pick up WARN_ON_ONCE() conversion...

author Ingo Molnar <mingo@kernel.org>

Tue, 30 Aug 2022 08:27:33 +0000 (10:27 +0200)

committer Ingo Molnar <mingo@kernel.org>

Tue, 30 Aug 2022 08:28:15 +0000 (10:28 +0200)
author Ingo Molnar <mingo@kernel.org>
Tue, 30 Aug 2022 08:27:33 +0000 (10:27 +0200)
committer Ingo Molnar <mingo@kernel.org>
Tue, 30 Aug 2022 08:28:15 +0000 (10:28 +0200)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 813687a..7d289d8 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -481,8 +481,7 @@ sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
   *                             p->se.load, p->rt_priority,
   *                             p->dl.dl_{runtime, deadline, period, flags, bw, density}
   *  - sched_setnuma():         p->numa_preferred_nid
- *  - sched_move_task()/
- *    cpu_cgroup_fork():       p->sched_task_group
+ *  - sched_move_task():       p->sched_task_group
   *  - uclamp_update_active()   p->uclamp*
   *
   * p->state <- TASK_*:
@@ -8861,7 +8860,7 @@ void sched_show_task(struct task_struct *p)
         if (pid_alive(p))
                 ppid = task_pid_nr(rcu_dereference(p->real_parent));
         rcu_read_unlock();
-       pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
+       pr_cont(" stack:%-5lu pid:%-5d ppid:%-6d flags:0x%08lx\n",
                 free, task_pid_nr(p), ppid,
                 read_task_thread_flags(p));
  
@@ -9601,9 +9600,6 @@ LIST_HEAD(task_groups);
  static struct kmem_cache *task_group_cache __read_mostly;
  #endif
  
-DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
-DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
-
  void __init sched_init(void)
  {
         unsigned long ptr = 0;
@@ -9647,14 +9643,6 @@ void __init sched_init(void)
  
  #endif /* CONFIG_RT_GROUP_SCHED */
         }
-#ifdef CONFIG_CPUMASK_OFFSTACK
-       for_each_possible_cpu(i) {
-               per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
-                       cpumask_size(), GFP_KERNEL, cpu_to_node(i));
-               per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
-                       cpumask_size(), GFP_KERNEL, cpu_to_node(i));
-       }
-#endif /* CONFIG_CPUMASK_OFFSTACK */
  
         init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
  
@@ -10163,7 +10151,7 @@ void sched_release_group(struct task_group *tg)
         spin_unlock_irqrestore(&task_group_lock, flags);
  }
  
-static void sched_change_group(struct task_struct *tsk, int type)
+static void sched_change_group(struct task_struct *tsk)
  {
         struct task_group *tg;
  
@@ -10179,7 +10167,7 @@ static void sched_change_group(struct task_struct *tsk, int type)
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
         if (tsk->sched_class->task_change_group)
-               tsk->sched_class->task_change_group(tsk, type);
+               tsk->sched_class->task_change_group(tsk);
         else
  #endif
                 set_task_rq(tsk, task_cpu(tsk));
@@ -10210,7 +10198,7 @@ void sched_move_task(struct task_struct *tsk)
         if (running)
                 put_prev_task(rq, tsk);
  
-       sched_change_group(tsk, TASK_MOVE_GROUP);
+       sched_change_group(tsk);
  
         if (queued)
                 enqueue_task(rq, tsk, queue_flags);
@@ -10288,53 +10276,19 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
         sched_unregister_group(tg);
  }
  
-/*
- * This is called before wake_up_new_task(), therefore we really only
- * have to set its group bits, all the other stuff does not apply.
- */
-static void cpu_cgroup_fork(struct task_struct *task)
-{
-       struct rq_flags rf;
-       struct rq *rq;
-
-       rq = task_rq_lock(task, &rf);
-
-       update_rq_clock(rq);
-       sched_change_group(task, TASK_SET_GROUP);
-
-       task_rq_unlock(rq, task, &rf);
-}
-
+#ifdef CONFIG_RT_GROUP_SCHED
  static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
  {
         struct task_struct *task;
         struct cgroup_subsys_state *css;
-       int ret = 0;
  
         cgroup_taskset_for_each(task, css, tset) {
-#ifdef CONFIG_RT_GROUP_SCHED
                 if (!sched_rt_can_attach(css_tg(css), task))
                         return -EINVAL;
-#endif
-               /*
-                * Serialize against wake_up_new_task() such that if it's
-                * running, we're sure to observe its full state.
-                */
-               raw_spin_lock_irq(&task->pi_lock);
-               /*
-                * Avoid calling sched_move_task() before wake_up_new_task()
-                * has happened. This would lead to problems with PELT, due to
-                * move wanting to detach+attach while we're not attached yet.
-                */
-               if (READ_ONCE(task->__state) == TASK_NEW)
-                       ret = -EINVAL;
-               raw_spin_unlock_irq(&task->pi_lock);
-
-               if (ret)
-                       break;
         }
-       return ret;
+       return 0;
  }
+#endif
  
  static void cpu_cgroup_attach(struct cgroup_taskset *tset)
  {
@@ -11170,8 +11124,9 @@ struct cgroup_subsys cpu_cgrp_subsys = {
         .css_released   = cpu_cgroup_css_released,
         .css_free       = cpu_cgroup_css_free,
         .css_extra_stat_show = cpu_extra_stat_show,
-       .fork           = cpu_cgroup_fork,
+#ifdef CONFIG_RT_GROUP_SCHED
         .can_attach     = cpu_cgroup_can_attach,
+#endif
         .attach         = cpu_cgroup_attach,
         .legacy_cftypes = cpu_legacy_files,
         .dfl_cftypes    = cpu_files,
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c

index 93878cb..1ec807f 100644 (file)
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -205,7 +205,7 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
         default:
                 err = -EINVAL;
                 goto out;
-       };
+       }
  
         if (type == PIDTYPE_PID) {
                 __sched_core_set(task, cookie);
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c

index 02d970a..57c92d7 100644 (file)
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -123,7 +123,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
                 unsigned long cap, max_cap = 0;
                 int cpu, max_cpu = -1;
  
-               if (!static_branch_unlikely(&sched_asym_cpucapacity))
+               if (!sched_asym_cpucap_active())
                         return 1;
  
                 /* Ensure the capacity of the CPUs fits the task. */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index 962b169..d116d2b 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -124,15 +124,12 @@ static inline int dl_bw_cpus(int i)
         return cpus;
  }
  
-static inline unsigned long __dl_bw_capacity(int i)
+static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
  {
-       struct root_domain *rd = cpu_rq(i)->rd;
         unsigned long cap = 0;
+       int i;
  
-       RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
-                        "sched RCU must be held");
-
-       for_each_cpu_and(i, rd->span, cpu_active_mask)
+       for_each_cpu_and(i, mask, cpu_active_mask)
                 cap += capacity_orig_of(i);
  
         return cap;
@@ -144,11 +141,14 @@ static inline unsigned long __dl_bw_capacity(int i)
   */
  static inline unsigned long dl_bw_capacity(int i)
  {
-       if (!static_branch_unlikely(&sched_asym_cpucapacity) &&
+       if (!sched_asym_cpucap_active() &&
             capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
                 return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
         } else {
-               return __dl_bw_capacity(i);
+               RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+                                "sched RCU must be held");
+
+               return __dl_bw_capacity(cpu_rq(i)->rd->span);
         }
  }
  
@@ -1333,11 +1333,7 @@ static void update_curr_dl(struct rq *rq)
  
         trace_sched_stat_runtime(curr, delta_exec, 0);
  
-       curr->se.sum_exec_runtime += delta_exec;
-       account_group_exec_runtime(curr, delta_exec);
-
-       curr->se.exec_start = now;
-       cgroup_account_cputime(curr, delta_exec);
+       update_current_exec_runtime(curr, now, delta_exec);
  
         if (dl_entity_is_special(dl_se))
                 return;
@@ -1849,7 +1845,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
          * Take the capacity of the CPU into account to
          * ensure it fits the requirement of the task.
          */
-       if (static_branch_unlikely(&sched_asym_cpucapacity))
+       if (sched_asym_cpucap_active())
                 select_rq |= !dl_task_fits_capacity(p, cpu);
  
         if (select_rq) {
@@ -3007,17 +3003,15 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
  int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
                                  const struct cpumask *trial)
  {
-       int ret = 1, trial_cpus;
+       unsigned long flags, cap;
         struct dl_bw *cur_dl_b;
-       unsigned long flags;
+       int ret = 1;
  
         rcu_read_lock_sched();
         cur_dl_b = dl_bw_of(cpumask_any(cur));
-       trial_cpus = cpumask_weight(trial);
-
+       cap = __dl_bw_capacity(trial);
         raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
-       if (cur_dl_b->bw != -1 &&
-           cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
+       if (__dl_overflow(cur_dl_b, cap, 0, 0))
                 ret = 0;
         raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
         rcu_read_unlock_sched();
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 28f10dc..efceb67 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -799,8 +799,6 @@ void init_entity_runnable_average(struct sched_entity *se)
         /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
  }
  
-static void attach_entity_cfs_rq(struct sched_entity *se);
-
  /*
   * With new tasks being created, their initial util_avgs are extrapolated
   * based on the cfs_rq's current util_avg:
@@ -835,20 +833,6 @@ void post_init_entity_util_avg(struct task_struct *p)
         long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq)));
         long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
  
-       if (cap > 0) {
-               if (cfs_rq->avg.util_avg != 0) {
-                       sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
-                       sa->util_avg /= (cfs_rq->avg.load_avg + 1);
-
-                       if (sa->util_avg > cap)
-                               sa->util_avg = cap;
-               } else {
-                       sa->util_avg = cap;
-               }
-       }
-
-       sa->runnable_avg = sa->util_avg;
-
         if (p->sched_class != &fair_sched_class) {
                 /*
                  * For !fair tasks do:
@@ -864,7 +848,19 @@ void post_init_entity_util_avg(struct task_struct *p)
                 return;
         }
  
-       attach_entity_cfs_rq(se);
+       if (cap > 0) {
+               if (cfs_rq->avg.util_avg != 0) {
+                       sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
+                       sa->util_avg /= (cfs_rq->avg.load_avg + 1);
+
+                       if (sa->util_avg > cap)
+                               sa->util_avg = cap;
+               } else {
+                       sa->util_avg = cap;
+               }
+       }
+
+       sa->runnable_avg = sa->util_avg;
  }
  
  #else /* !CONFIG_SMP */
@@ -3838,8 +3834,7 @@ static void migrate_se_pelt_lag(struct sched_entity *se) {}
   * @cfs_rq: cfs_rq to update
   *
   * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
- * avg. The immediate corollary is that all (fair) tasks must be attached, see
- * post_init_entity_util_avg().
+ * avg. The immediate corollary is that all (fair) tasks must be attached.
   *
   * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
   *
@@ -4003,6 +3998,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
  #define UPDATE_TG      0x1
  #define SKIP_AGE_LOAD  0x2
  #define DO_ATTACH      0x4
+#define DO_DETACH      0x8
  
  /* Update task and its cfs_rq load average */
  static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -4032,6 +4028,13 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
                 attach_entity_load_avg(cfs_rq, se);
                 update_tg_load_avg(cfs_rq);
  
+       } else if (flags & DO_DETACH) {
+               /*
+                * DO_DETACH means we're here from dequeue_entity()
+                * and we are migrating task out of the CPU.
+                */
+               detach_entity_load_avg(cfs_rq, se);
+               update_tg_load_avg(cfs_rq);
         } else if (decayed) {
                 cfs_rq_util_change(cfs_rq, 0);
  
@@ -4064,8 +4067,8 @@ static void remove_entity_load_avg(struct sched_entity *se)
  
         /*
          * tasks cannot exit without having gone through wake_up_new_task() ->
-        * post_init_entity_util_avg() which will have added things to the
-        * cfs_rq, so we can remove unconditionally.
+        * enqueue_task_fair() which will have added things to the cfs_rq,
+        * so we can remove unconditionally.
          */
  
         sync_entity_load_avg(se);
@@ -4262,7 +4265,7 @@ static inline int task_fits_capacity(struct task_struct *p,
  
  static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
  {
-       if (!static_branch_unlikely(&sched_asym_cpucapacity))
+       if (!sched_asym_cpucap_active())
                 return;
  
         if (!p || p->nr_cpus_allowed == 1) {
@@ -4292,6 +4295,7 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
  #define UPDATE_TG      0x0
  #define SKIP_AGE_LOAD  0x0
  #define DO_ATTACH      0x0
+#define DO_DETACH      0x0
  
  static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
  {
@@ -4434,7 +4438,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         /*
          * When enqueuing a sched_entity, we must:
          *   - Update loads to have both entity and cfs_rq synced with now.
-        *   - Add its load to cfs_rq->runnable_avg
+        *   - For group_entity, update its runnable_weight to reflect the new
+        *     h_nr_running of its group cfs_rq.
          *   - For group_entity, update its weight to reflect the new share of
          *     its group cfs_rq
          *   - Add its new weight to cfs_rq->load.weight
@@ -4511,6 +4516,11 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
  static void
  dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
  {
+       int action = UPDATE_TG;
+
+       if (entity_is_task(se) && task_on_rq_migrating(task_of(se)))
+               action |= DO_DETACH;
+
         /*
          * Update run-time statistics of the 'current'.
          */
@@ -4519,12 +4529,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         /*
          * When dequeuing a sched_entity, we must:
          *   - Update loads to have both entity and cfs_rq synced with now.
-        *   - Subtract its load from the cfs_rq->runnable_avg.
+        *   - For group_entity, update its runnable_weight to reflect the new
+        *     h_nr_running of its group cfs_rq.
          *   - Subtract its previous weight from cfs_rq->load.weight.
          *   - For group entity, update its weight to reflect the new share
          *     of its group cfs_rq.
          */
-       update_load_avg(cfs_rq, se, UPDATE_TG);
+       update_load_avg(cfs_rq, se, action);
         se_update_runnable(se);
  
         update_stats_dequeue_fair(cfs_rq, se, flags);
@@ -5893,8 +5904,8 @@ dequeue_throttle:
  #ifdef CONFIG_SMP
  
  /* Working cpumask for: load_balance, load_balance_newidle. */
-DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
-DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
+static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
+static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
  
  #ifdef CONFIG_NO_HZ_COMMON
  
@@ -6506,7 +6517,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
  
  static inline bool asym_fits_capacity(unsigned long task_util, int cpu)
  {
-       if (static_branch_unlikely(&sched_asym_cpucapacity))
+       if (sched_asym_cpucap_active())
                 return fits_capacity(task_util, capacity_of(cpu));
  
         return true;
@@ -6526,7 +6537,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
          * On asymmetric system, update task utilization because we will check
          * that the task fits with cpu's capacity.
          */
-       if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+       if (sched_asym_cpucap_active()) {
                 sync_entity_load_avg(&p->se);
                 task_util = uclamp_task_util(p);
         }
@@ -6580,7 +6591,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
          * For asymmetric CPU capacity systems, our domain of interest is
          * sd_asym_cpucapacity rather than sd_llc.
          */
-       if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+       if (sched_asym_cpucap_active()) {
                 sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
                 /*
                  * On an asymmetric CPU capacity system where an exclusive
@@ -7076,8 +7087,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
         return new_cpu;
  }
  
-static void detach_entity_cfs_rq(struct sched_entity *se);
-
  /*
   * Called immediately before a task is migrated to a new CPU; task_cpu(p) and
   * cfs_rq_of(p) references at time of call are still valid and identify the
@@ -7099,15 +7108,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
                 se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
         }
  
-       if (p->on_rq == TASK_ON_RQ_MIGRATING) {
-               /*
-                * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
-                * rq->lock and can modify state directly.
-                */
-               lockdep_assert_rq_held(task_rq(p));
-               detach_entity_cfs_rq(se);
-
-       } else {
+       if (!task_on_rq_migrating(p)) {
                 remove_entity_load_avg(se);
  
                 /*
@@ -10916,8 +10917,7 @@ static bool update_nohz_stats(struct rq *rq)
   * can be a simple update of blocked load or a complete load balance with
   * tasks movement depending of flags.
   */
-static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
-                              enum cpu_idle_type idle)
+static void _nohz_idle_balance(struct rq *this_rq, unsigned int flags)
  {
         /* Earliest time when we have to do rebalance again */
         unsigned long now = jiffies;
@@ -11032,7 +11032,7 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
         if (idle != CPU_IDLE)
                 return false;
  
-       _nohz_idle_balance(this_rq, flags, idle);
+       _nohz_idle_balance(this_rq, flags);
  
         return true;
  }
@@ -11052,7 +11052,7 @@ void nohz_run_idle_balance(int cpu)
          * (ie NOHZ_STATS_KICK set) and will do the same.
          */
         if ((flags == NOHZ_NEWILB_KICK) && !need_resched())
-               _nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK, CPU_IDLE);
+               _nohz_idle_balance(cpu_rq(cpu), NOHZ_STATS_KICK);
  }
  
  static void nohz_newidle_balance(struct rq *this_rq)
@@ -11552,6 +11552,17 @@ static void detach_entity_cfs_rq(struct sched_entity *se)
  {
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
+#ifdef CONFIG_SMP
+       /*
+        * In case the task sched_avg hasn't been attached:
+        * - A forked task which hasn't been woken up by wake_up_new_task().
+        * - A task which has been woken up by try_to_wake_up() but is
+        *   waiting for actually being woken up by sched_ttwu_pending().
+        */
+       if (!se->avg.last_update_time)
+               return;
+#endif
+
         /* Catch up with the cfs_rq and remove our load when we leave */
         update_load_avg(cfs_rq, se, 0);
         detach_entity_load_avg(cfs_rq, se);
@@ -11563,14 +11574,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
  {
         struct cfs_rq *cfs_rq = cfs_rq_of(se);
  
-#ifdef CONFIG_FAIR_GROUP_SCHED
-       /*
-        * Since the real-depth could have been changed (only FAIR
-        * class maintain depth value), reset depth properly.
-        */
-       se->depth = se->parent ? se->parent->depth + 1 : 0;
-#endif
-
         /* Synchronize entity with its cfs_rq */
         update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
         attach_entity_load_avg(cfs_rq, se);
@@ -11666,39 +11669,25 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-static void task_set_group_fair(struct task_struct *p)
+static void task_change_group_fair(struct task_struct *p)
  {
-       struct sched_entity *se = &p->se;
-
-       set_task_rq(p, task_cpu(p));
-       se->depth = se->parent ? se->parent->depth + 1 : 0;
-}
+       /*
+        * We couldn't detach or attach a forked task which
+        * hasn't been woken up by wake_up_new_task().
+        */
+       if (READ_ONCE(p->__state) == TASK_NEW)
+               return;
  
-static void task_move_group_fair(struct task_struct *p)
-{
         detach_task_cfs_rq(p);
-       set_task_rq(p, task_cpu(p));
  
  #ifdef CONFIG_SMP
         /* Tell se's cfs_rq has been changed -- migrated */
         p->se.avg.last_update_time = 0;
  #endif
+       set_task_rq(p, task_cpu(p));
         attach_task_cfs_rq(p);
  }
  
-static void task_change_group_fair(struct task_struct *p, int type)
-{
-       switch (type) {
-       case TASK_SET_GROUP:
-               task_set_group_fair(p);
-               break;
-
-       case TASK_MOVE_GROUP:
-               task_move_group_fair(p);
-               break;
-       }
-}
-
  void free_fair_sched_group(struct task_group *tg)
  {
         int i;
@@ -12075,6 +12064,13 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
  __init void init_sched_fair_class(void)
  {
  #ifdef CONFIG_SMP
+       int i;
+
+       for_each_possible_cpu(i) {
+               zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i));
+               zalloc_cpumask_var_node(&per_cpu(select_rq_mask,    i), GFP_KERNEL, cpu_to_node(i));
+       }
+
         open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
  
  #ifdef CONFIG_NO_HZ_COMMON
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

index 2936fe5..27e694c 100644 (file)
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -509,7 +509,7 @@ static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
         unsigned int cpu_cap;
  
         /* Only heterogeneous systems can benefit from this check */
-       if (!static_branch_unlikely(&sched_asym_cpucapacity))
+       if (!sched_asym_cpucap_active())
                 return true;
  
         min_cap = uclamp_eff_value(p, UCLAMP_MIN);
@@ -1062,11 +1062,7 @@ static void update_curr_rt(struct rq *rq)
  
         trace_sched_stat_runtime(curr, delta_exec, 0);
  
-       curr->se.sum_exec_runtime += delta_exec;
-       account_group_exec_runtime(curr, delta_exec);
-
-       curr->se.exec_start = now;
-       cgroup_account_cputime(curr, delta_exec);
+       update_current_exec_runtime(curr, now, delta_exec);
  
         if (!rt_bandwidth_enabled())
                 return;
@@ -1897,7 +1893,7 @@ static int find_lowest_rq(struct task_struct *task)
          * If we're on asym system ensure we consider the different capacities
          * of the CPUs when searching for the lowest_mask.
          */
-       if (static_branch_unlikely(&sched_asym_cpucapacity)) {
+       if (sched_asym_cpucap_active()) {
  
                 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
                                           task, lowest_mask,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 7a44dce..b75ac74 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -321,21 +321,6 @@ struct dl_bw {
         u64                     total_bw;
  };
  
-/*
- * Verify the fitness of task @p to run on @cpu taking into account the
- * CPU original capacity and the runtime/deadline ratio of the task.
- *
- * The function will return true if the CPU original capacity of the
- * @cpu scaled by SCHED_CAPACITY_SCALE >= runtime/deadline ratio of the
- * task and false otherwise.
- */
-static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
-{
-       unsigned long cap = arch_scale_cpu_capacity(cpu);
-
-       return cap_scale(p->dl.dl_deadline, cap) >= p->dl.dl_runtime;
-}
-
  extern void init_dl_bw(struct dl_bw *dl_b);
  extern int  sched_dl_global_validate(void);
  extern void sched_dl_do_global(void);
@@ -1815,6 +1800,11 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
  extern struct static_key_false sched_asym_cpucapacity;
  
+static __always_inline bool sched_asym_cpucap_active(void)
+{
+       return static_branch_unlikely(&sched_asym_cpucapacity);
+}
+
  struct sched_group_capacity {
         atomic_t                ref;
         /*
@@ -1942,6 +1932,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
         set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
         p->se.cfs_rq = tg->cfs_rq[cpu];
         p->se.parent = tg->se[cpu];
+       p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
  #endif
  
  #ifdef CONFIG_RT_GROUP_SCHED
@@ -2204,11 +2195,8 @@ struct sched_class {
  
         void (*update_curr)(struct rq *rq);
  
-#define TASK_SET_GROUP         0
-#define TASK_MOVE_GROUP                1
-
  #ifdef CONFIG_FAIR_GROUP_SCHED
-       void (*task_change_group)(struct task_struct *p, int type);
+       void (*task_change_group)(struct task_struct *p);
  #endif
  };
  
@@ -2896,6 +2884,21 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
                                  enum cpu_util_type type,
                                  struct task_struct *p);
  
+/*
+ * Verify the fitness of task @p to run on @cpu taking into account the
+ * CPU original capacity and the runtime/deadline ratio of the task.
+ *
+ * The function will return true if the original capacity of @cpu is
+ * greater than or equal to task's deadline density right shifted by
+ * (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
+ */
+static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
+{
+       unsigned long cap = arch_scale_cpu_capacity(cpu);
+
+       return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT);
+}
+
  static inline unsigned long cpu_bw_dl(struct rq *rq)
  {
         return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
@@ -3157,4 +3160,14 @@ extern int sched_dynamic_mode(const char *str);
  extern void sched_dynamic_update(int mode);
  #endif
  
+static inline void update_current_exec_runtime(struct task_struct *curr,
+                                               u64 now, u64 delta_exec)
+{
+       curr->se.sum_exec_runtime += delta_exec;
+       account_group_exec_runtime(curr, delta_exec);
+
+       curr->se.exec_start = now;
+       cgroup_account_cputime(curr, delta_exec);
+}
+
  #endif /* _KERNEL_SCHED_SCHED_H */
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c

index d04073a..8559059 100644 (file)
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -71,20 +71,17 @@ static void yield_task_stop(struct rq *rq)
  static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
  {
         struct task_struct *curr = rq->curr;
-       u64 delta_exec;
+       u64 now, delta_exec;
  
-       delta_exec = rq_clock_task(rq) - curr->se.exec_start;
+       now = rq_clock_task(rq);
+       delta_exec = now - curr->se.exec_start;
         if (unlikely((s64)delta_exec < 0))
                 delta_exec = 0;
  
         schedstat_set(curr->stats.exec_max,
                       max(curr->stats.exec_max, delta_exec));
  
-       curr->se.sum_exec_runtime += delta_exec;
-       account_group_exec_runtime(curr, delta_exec);
-
-       curr->se.exec_start = rq_clock_task(rq);
-       cgroup_account_cputime(curr, delta_exec);
+       update_current_exec_runtime(curr, now, delta_exec);
  }
  
  /*
author	Ingo Molnar <mingo@kernel.org>
	Tue, 30 Aug 2022 08:27:33 +0000 (10:27 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Tue, 30 Aug 2022 08:28:15 +0000 (10:28 +0200)
kernel/sched/core.c		patch \| blob \| history
kernel/sched/core_sched.c		patch \| blob \| history
kernel/sched/cpudeadline.c		patch \| blob \| history
kernel/sched/deadline.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/sched/rt.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history
kernel/sched/stop_task.c		patch \| blob \| history