Merge tag 'sched-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 1 Aug 2022 18:49:06 +0000 (11:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 1 Aug 2022 18:49:06 +0000 (11:49 -0700)
Pull scheduler updates from Ingo Molnar:
"Load-balancing improvements:

   - Improve NUMA balancing on AMD Zen systems for affine workloads.

   - Improve the handling of reduced-capacity CPUs in load-balancing.

   - Energy Model improvements: fix & refine all the energy fairness
     metrics (PELT), and remove the conservative threshold requiring 6%
     energy savings to migrate a task. Doing this improves power
     efficiency for most workloads, and also increases the reliability
     of energy-efficiency scheduling.

   - Optimize/tweak select_idle_cpu() to spend (much) less time
     searching for an idle CPU on overloaded systems. There's reports of
     several milliseconds spent there on large systems with large
     workloads ...

     [ Since the search logic changed, there might be behavioral side
       effects. ]

   - Improve NUMA imbalance behavior. On certain systems with spare
     capacity, initial placement of tasks is non-deterministic, and such
     an artificial placement imbalance can persist for a long time,
     hurting (and sometimes helping) performance.

     The fix is to make fork-time task placement consistent with runtime
     NUMA balancing placement.

     Note that some performance regressions were reported against this,
     caused by workloads that are not memory bandwith limited, which
     benefit from the artificial locality of the placement bug(s). Mel
     Gorman's conclusion, with which we concur, was that consistency is
     better than random workload benefits from non-deterministic bugs:

        "Given there is no crystal ball and it's a tradeoff, I think
         it's better to be consistent and use similar logic at both fork
         time and runtime even if it doesn't have universal benefit."

   - Improve core scheduling by fixing a bug in
     sched_core_update_cookie() that caused unnecessary forced idling.

   - Improve wakeup-balancing by allowing same-LLC wakeup of idle CPUs
     for newly woken tasks.

   - Fix a newidle balancing bug that introduced unnecessary wakeup
     latencies.

  ABI improvements/fixes:

   - Do not check capabilities and do not issue capability check denial
     messages when a scheduler syscall doesn't require privileges. (Such
     as increasing niceness.)

   - Add forced-idle accounting to cgroups too.

   - Fix/improve the RSEQ ABI to not just silently accept unknown flags.
     (No existing tooling is known to have learned to rely on the
     previous behavior.)

   - Depreciate the (unused) RSEQ_CS_FLAG_NO_RESTART_ON_* flags.

  Optimizations:

   - Optimize & simplify leaf_cfs_rq_list()

   - Micro-optimize set_nr_{and_not,if}_polling() via try_cmpxchg().

  Misc fixes & cleanups:

   - Fix the RSEQ self-tests on RISC-V and Glibc 2.35 systems.

   - Fix a full-NOHZ bug that can in some cases result in the tick not
     being re-enabled when the last SCHED_RT task is gone from a
     runqueue but there's still SCHED_OTHER tasks around.

   - Various PREEMPT_RT related fixes.

   - Misc cleanups & smaller fixes"

* tag 'sched-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits)
  rseq: Kill process when unknown flags are encountered in ABI structures
  rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_* flags
  sched/core: Fix the bug that task won't enqueue into core tree when update cookie
  nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()
  sched/core: Always flush pending blk_plug
  sched/fair: fix case with reduced capacity CPU
  sched/core: Use try_cmpxchg in set_nr_{and_not,if}_polling
  sched/core: add forced idle accounting for cgroups
  sched/fair: Remove the energy margin in feec()
  sched/fair: Remove task_util from effective utilization in feec()
  sched/fair: Use the same cpumask per-PD throughout find_energy_efficient_cpu()
  sched/fair: Rename select_idle_mask to select_rq_mask
  sched, drivers: Remove max param from effective_cpu_util()/sched_cpu_util()
  sched/fair: Decay task PELT values during wakeup migration
  sched/fair: Provide u64 read for 32-bits arch helper
  sched/fair: Introduce SIS_UTIL to search idle CPU based on sum of util_avg
  sched: only perform capability check on privileged operation
  sched: Remove unused function group_first_cpu()
  sched/fair: Remove redundant word " *"
  selftests/rseq: check if libc rseq support is registered
  ...

1  2 
include/linux/cgroup-defs.h
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/sched.h

@@@ -264,8 -264,7 +264,8 @@@ struct css_set 
         * List of csets participating in the on-going migration either as
         * source or destination.  Protected by cgroup_mutex.
         */
 -      struct list_head mg_preload_node;
 +      struct list_head mg_src_preload_node;
 +      struct list_head mg_dst_preload_node;
        struct list_head mg_node;
  
        /*
  
  struct cgroup_base_stat {
        struct task_cputime cputime;
+ #ifdef CONFIG_SCHED_CORE
+       u64 forceidle_sum;
+ #endif
  };
  
  /*
diff --combined kernel/sched/core.c
@@@ -873,15 -873,11 +873,11 @@@ static inline void hrtick_rq_init(struc
        ({                                                              \
                typeof(ptr) _ptr = (ptr);                               \
                typeof(mask) _mask = (mask);                            \
-               typeof(*_ptr) _old, _val = *_ptr;                       \
+               typeof(*_ptr) _val = *_ptr;                             \
                                                                        \
-               for (;;) {                                              \
-                       _old = cmpxchg(_ptr, _val, _val | _mask);       \
-                       if (_old == _val)                               \
-                               break;                                  \
-                       _val = _old;                                    \
-               }                                                       \
-       _old;                                                           \
+               do {                                                    \
+               } while (!try_cmpxchg(_ptr, &_val, _val | _mask));      \
+       _val;                                                           \
  })
  
  #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
   * this avoids any races wrt polling state changes and thereby avoids
   * spurious IPIs.
   */
- static bool set_nr_and_not_polling(struct task_struct *p)
+ static inline bool set_nr_and_not_polling(struct task_struct *p)
  {
        struct thread_info *ti = task_thread_info(p);
        return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
  static bool set_nr_if_polling(struct task_struct *p)
  {
        struct thread_info *ti = task_thread_info(p);
-       typeof(ti->flags) old, val = READ_ONCE(ti->flags);
+       typeof(ti->flags) val = READ_ONCE(ti->flags);
  
        for (;;) {
                if (!(val & _TIF_POLLING_NRFLAG))
                        return false;
                if (val & _TIF_NEED_RESCHED)
                        return true;
-               old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-               if (old == val)
+               if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED))
                        break;
-               val = old;
        }
        return true;
  }
  
  #else
- static bool set_nr_and_not_polling(struct task_struct *p)
+ static inline bool set_nr_and_not_polling(struct task_struct *p)
  {
        set_tsk_need_resched(p);
        return true;
  }
  
  #ifdef CONFIG_SMP
- static bool set_nr_if_polling(struct task_struct *p)
+ static inline bool set_nr_if_polling(struct task_struct *p)
  {
        return false;
  }
@@@ -3808,7 -3802,7 +3802,7 @@@ bool cpus_share_cache(int this_cpu, in
        return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
  }
  
- static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+ static inline bool ttwu_queue_cond(int cpu)
  {
        /*
         * Do not complicate things with the async wake_list while the CPU is
        if (!cpus_share_cache(smp_processor_id(), cpu))
                return true;
  
+       if (cpu == smp_processor_id())
+               return false;
        /*
-        * If the task is descheduling and the only running task on the
-        * CPU then use the wakelist to offload the task activation to
-        * the soon-to-be-idle CPU as the current CPU is likely busy.
-        * nr_running is checked to avoid unnecessary task stacking.
+        * If the wakee cpu is idle, or the task is descheduling and the
+        * only running task on the CPU, then use the wakelist to offload
+        * the task activation to the idle (or soon-to-be-idle) CPU as
+        * the current CPU is likely busy. nr_running is checked to
+        * avoid unnecessary task stacking.
+        *
+        * Note that we can only get here with (wakee) p->on_rq=0,
+        * p->on_cpu can be whatever, we've done the dequeue, so
+        * the wakee has been accounted out of ->nr_running.
         */
-       if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
+       if (!cpu_rq(cpu)->nr_running)
                return true;
  
        return false;
  
  static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
  {
-       if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
-               if (WARN_ON_ONCE(cpu == smp_processor_id()))
-                       return false;
+       if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu)) {
                sched_clock_cpu(cpu); /* Sync clocks across CPUs */
                __ttwu_queue_wakelist(p, cpu, wake_flags);
                return true;
@@@ -4163,7 -4162,7 +4162,7 @@@ try_to_wake_up(struct task_struct *p, u
         * scheduling.
         */
        if (smp_load_acquire(&p->on_cpu) &&
-           ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
+           ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
                goto unlock;
  
        /*
@@@ -4753,7 -4752,8 +4752,8 @@@ static inline void prepare_task(struct 
         * Claim the task as running, we do this before switching to it
         * such that any running task will have this set.
         *
-        * See the ttwu() WF_ON_CPU case and its ordering comment.
+        * See the smp_load_acquire(&p->on_cpu) case in ttwu() and
+        * its ordering comment.
         */
        WRITE_ONCE(next->on_cpu, 1);
  #endif
@@@ -4798,55 -4798,25 +4798,55 @@@ static void do_balance_callbacks(struc
  
  static void balance_push(struct rq *rq);
  
 +/*
 + * balance_push_callback is a right abuse of the callback interface and plays
 + * by significantly different rules.
 + *
 + * Where the normal balance_callback's purpose is to be ran in the same context
 + * that queued it (only later, when it's safe to drop rq->lock again),
 + * balance_push_callback is specifically targeted at __schedule().
 + *
 + * This abuse is tolerated because it places all the unlikely/odd cases behind
 + * a single test, namely: rq->balance_callback == NULL.
 + */
  struct callback_head balance_push_callback = {
        .next = NULL,
        .func = (void (*)(struct callback_head *))balance_push,
  };
  
 -static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
 +static inline struct callback_head *
 +__splice_balance_callbacks(struct rq *rq, bool split)
  {
        struct callback_head *head = rq->balance_callback;
  
 +      if (likely(!head))
 +              return NULL;
 +
        lockdep_assert_rq_held(rq);
 -      if (head)
 +      /*
 +       * Must not take balance_push_callback off the list when
 +       * splice_balance_callbacks() and balance_callbacks() are not
 +       * in the same rq->lock section.
 +       *
 +       * In that case it would be possible for __schedule() to interleave
 +       * and observe the list empty.
 +       */
 +      if (split && head == &balance_push_callback)
 +              head = NULL;
 +      else
                rq->balance_callback = NULL;
  
        return head;
  }
  
 +static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
 +{
 +      return __splice_balance_callbacks(rq, true);
 +}
 +
  static void __balance_callbacks(struct rq *rq)
  {
 -      do_balance_callbacks(rq, splice_balance_callbacks(rq));
 +      do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
  }
  
  static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
@@@ -6500,8 -6470,12 +6500,12 @@@ static inline void sched_submit_work(st
                        io_wq_worker_sleeping(tsk);
        }
  
-       if (tsk_is_pi_blocked(tsk))
-               return;
+       /*
+        * spinlock and rwlock must not flush block requests.  This will
+        * deadlock if the callback attempts to acquire a lock which is
+        * already acquired.
+        */
+       SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT);
  
        /*
         * If we are going to sleep and we have plugged IO queued,
@@@ -6998,17 -6972,29 +7002,29 @@@ out_unlock
  EXPORT_SYMBOL(set_user_nice);
  
  /*
-  * can_nice - check if a task can reduce its nice value
+  * is_nice_reduction - check if nice value is an actual reduction
+  *
+  * Similar to can_nice() but does not perform a capability check.
+  *
   * @p: task
   * @nice: nice value
   */
int can_nice(const struct task_struct *p, const int nice)
static bool is_nice_reduction(const struct task_struct *p, const int nice)
  {
        /* Convert nice value [19,-20] to rlimit style value [1,40]: */
        int nice_rlim = nice_to_rlimit(nice);
  
-       return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-               capable(CAP_SYS_NICE));
+       return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
+ }
+ /*
+  * can_nice - check if a task can reduce its nice value
+  * @p: task
+  * @nice: nice value
+  */
+ int can_nice(const struct task_struct *p, const int nice)
+ {
+       return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE);
  }
  
  #ifdef __ARCH_WANT_SYS_NICE
@@@ -7137,12 -7123,14 +7153,14 @@@ struct task_struct *idle_task(int cpu
   * required to meet deadlines.
   */
  unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
-                                unsigned long max, enum cpu_util_type type,
+                                enum cpu_util_type type,
                                 struct task_struct *p)
  {
-       unsigned long dl_util, util, irq;
+       unsigned long dl_util, util, irq, max;
        struct rq *rq = cpu_rq(cpu);
  
+       max = arch_scale_cpu_capacity(cpu);
        if (!uclamp_is_used() &&
            type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
                return max;
        return min(max, util);
  }
  
- unsigned long sched_cpu_util(int cpu, unsigned long max)
+ unsigned long sched_cpu_util(int cpu)
  {
-       return effective_cpu_util(cpu, cpu_util_cfs(cpu), max,
-                                 ENERGY_UTIL, NULL);
+       return effective_cpu_util(cpu, cpu_util_cfs(cpu), ENERGY_UTIL, NULL);
  }
  #endif /* CONFIG_SMP */
  
@@@ -7287,6 -7274,69 +7304,69 @@@ static bool check_same_owner(struct tas
        return match;
  }
  
+ /*
+  * Allow unprivileged RT tasks to decrease priority.
+  * Only issue a capable test if needed and only once to avoid an audit
+  * event on permitted non-privileged operations:
+  */
+ static int user_check_sched_setscheduler(struct task_struct *p,
+                                        const struct sched_attr *attr,
+                                        int policy, int reset_on_fork)
+ {
+       if (fair_policy(policy)) {
+               if (attr->sched_nice < task_nice(p) &&
+                   !is_nice_reduction(p, attr->sched_nice))
+                       goto req_priv;
+       }
+       if (rt_policy(policy)) {
+               unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
+               /* Can't set/change the rt policy: */
+               if (policy != p->policy && !rlim_rtprio)
+                       goto req_priv;
+               /* Can't increase priority: */
+               if (attr->sched_priority > p->rt_priority &&
+                   attr->sched_priority > rlim_rtprio)
+                       goto req_priv;
+       }
+       /*
+        * Can't set/change SCHED_DEADLINE policy at all for now
+        * (safest behavior); in the future we would like to allow
+        * unprivileged DL tasks to increase their relative deadline
+        * or reduce their runtime (both ways reducing utilization)
+        */
+       if (dl_policy(policy))
+               goto req_priv;
+       /*
+        * Treat SCHED_IDLE as nice 20. Only allow a switch to
+        * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
+        */
+       if (task_has_idle_policy(p) && !idle_policy(policy)) {
+               if (!is_nice_reduction(p, task_nice(p)))
+                       goto req_priv;
+       }
+       /* Can't change other user's priorities: */
+       if (!check_same_owner(p))
+               goto req_priv;
+       /* Normal users shall not reset the sched_reset_on_fork flag: */
+       if (p->sched_reset_on_fork && !reset_on_fork)
+               goto req_priv;
+       return 0;
+ req_priv:
+       if (!capable(CAP_SYS_NICE))
+               return -EPERM;
+       return 0;
+ }
  static int __sched_setscheduler(struct task_struct *p,
                                const struct sched_attr *attr,
                                bool user, bool pi)
@@@ -7328,58 -7378,11 +7408,11 @@@ recheck
            (rt_policy(policy) != (attr->sched_priority != 0)))
                return -EINVAL;
  
-       /*
-        * Allow unprivileged RT tasks to decrease priority:
-        */
-       if (user && !capable(CAP_SYS_NICE)) {
-               if (fair_policy(policy)) {
-                       if (attr->sched_nice < task_nice(p) &&
-                           !can_nice(p, attr->sched_nice))
-                               return -EPERM;
-               }
-               if (rt_policy(policy)) {
-                       unsigned long rlim_rtprio =
-                                       task_rlimit(p, RLIMIT_RTPRIO);
-                       /* Can't set/change the rt policy: */
-                       if (policy != p->policy && !rlim_rtprio)
-                               return -EPERM;
-                       /* Can't increase priority: */
-                       if (attr->sched_priority > p->rt_priority &&
-                           attr->sched_priority > rlim_rtprio)
-                               return -EPERM;
-               }
-                /*
-                 * Can't set/change SCHED_DEADLINE policy at all for now
-                 * (safest behavior); in the future we would like to allow
-                 * unprivileged DL tasks to increase their relative deadline
-                 * or reduce their runtime (both ways reducing utilization)
-                 */
-               if (dl_policy(policy))
-                       return -EPERM;
-               /*
-                * Treat SCHED_IDLE as nice 20. Only allow a switch to
-                * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
-                */
-               if (task_has_idle_policy(p) && !idle_policy(policy)) {
-                       if (!can_nice(p, task_nice(p)))
-                               return -EPERM;
-               }
-               /* Can't change other user's priorities: */
-               if (!check_same_owner(p))
-                       return -EPERM;
-               /* Normal users shall not reset the sched_reset_on_fork flag: */
-               if (p->sched_reset_on_fork && !reset_on_fork)
-                       return -EPERM;
-       }
        if (user) {
+               retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork);
+               if (retval)
+                       return retval;
                if (attr->sched_flags & SCHED_FLAG_SUGOV)
                        return -EINVAL;
  
@@@ -9531,7 -9534,7 +9564,7 @@@ static struct kmem_cache *task_group_ca
  #endif
  
  DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
- DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+ DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
  
  void __init sched_init(void)
  {
        for_each_possible_cpu(i) {
                per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
                        cpumask_size(), GFP_KERNEL, cpu_to_node(i));
-               per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+               per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
                        cpumask_size(), GFP_KERNEL, cpu_to_node(i));
        }
  #endif /* CONFIG_CPUMASK_OFFSTACK */
diff --combined kernel/sched/deadline.c
@@@ -30,14 -30,16 +30,16 @@@ static struct ctl_table sched_dl_sysctl
                .data           = &sysctl_sched_dl_period_max,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_douintvec_minmax,
+               .extra1         = (void *)&sysctl_sched_dl_period_min,
        },
        {
                .procname       = "sched_deadline_period_min_us",
                .data           = &sysctl_sched_dl_period_min,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_douintvec_minmax,
+               .extra2         = (void *)&sysctl_sched_dl_period_max,
        },
        {}
  };
@@@ -1701,10 -1703,7 +1703,10 @@@ static void enqueue_task_dl(struct rq *
                 * the throttle.
                 */
                p->dl.dl_throttled = 0;
 -              BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
 +              if (!(flags & ENQUEUE_REPLENISH))
 +                      printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
 +                                           task_pid_nr(p));
 +
                return;
        }
  
diff --combined kernel/sched/sched.h
@@@ -520,6 -520,45 +520,45 @@@ struct cfs_bandwidth { }
  
  #endif        /* CONFIG_CGROUP_SCHED */
  
+ /*
+  * u64_u32_load/u64_u32_store
+  *
+  * Use a copy of a u64 value to protect against data race. This is only
+  * applicable for 32-bits architectures.
+  */
+ #ifdef CONFIG_64BIT
+ # define u64_u32_load_copy(var, copy)       var
+ # define u64_u32_store_copy(var, copy, val) (var = val)
+ #else
+ # define u64_u32_load_copy(var, copy)                                 \
+ ({                                                                    \
+       u64 __val, __val_copy;                                          \
+       do {                                                            \
+               __val_copy = copy;                                      \
+               /*                                                      \
+                * paired with u64_u32_store_copy(), ordering access    \
+                * to var and copy.                                     \
+                */                                                     \
+               smp_rmb();                                              \
+               __val = var;                                            \
+       } while (__val != __val_copy);                                  \
+       __val;                                                          \
+ })
+ # define u64_u32_store_copy(var, copy, val)                           \
+ do {                                                                  \
+       typeof(val) __val = (val);                                      \
+       var = __val;                                                    \
+       /*                                                              \
+        * paired with u64_u32_load_copy(), ordering access to var and  \
+        * copy.                                                        \
+        */                                                             \
+       smp_wmb();                                                      \
+       copy = __val;                                                   \
+ } while (0)
+ #endif
+ # define u64_u32_load(var)      u64_u32_load_copy(var, var##_copy)
+ # define u64_u32_store(var, val) u64_u32_store_copy(var, var##_copy, val)
  /* CFS-related fields in a runqueue */
  struct cfs_rq {
        struct load_weight      load;
         */
        struct sched_avg        avg;
  #ifndef CONFIG_64BIT
-       u64                     load_last_update_time_copy;
+       u64                     last_update_time_copy;
  #endif
        struct {
                raw_spinlock_t  lock ____cacheline_aligned;
        int                     runtime_enabled;
        s64                     runtime_remaining;
  
+       u64                     throttled_pelt_idle;
+ #ifndef CONFIG_64BIT
+       u64                     throttled_pelt_idle_copy;
+ #endif
        u64                     throttled_clock;
        u64                     throttled_clock_pelt;
        u64                     throttled_clock_pelt_time;
@@@ -981,6 -1024,12 +1024,12 @@@ struct rq 
        u64                     clock_task ____cacheline_aligned;
        u64                     clock_pelt;
        unsigned long           lost_idle_time;
+       u64                     clock_pelt_idle;
+       u64                     clock_idle;
+ #ifndef CONFIG_64BIT
+       u64                     clock_pelt_idle_copy;
+       u64                     clock_idle_copy;
+ #endif
  
        atomic_t                nr_iowait;
  
@@@ -1693,11 -1742,6 +1742,11 @@@ queue_balance_callback(struct rq *rq
  {
        lockdep_assert_rq_held(rq);
  
 +      /*
 +       * Don't (re)queue an already queued item; nor queue anything when
 +       * balance_push() is active, see the comment with
 +       * balance_push_callback.
 +       */
        if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
                return;
  
@@@ -1815,15 -1859,6 +1864,6 @@@ static inline struct cpumask *group_bal
        return to_cpumask(sg->sgc->cpumask);
  }
  
- /**
-  * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
-  * @group: The group whose first CPU is to be returned.
-  */
- static inline unsigned int group_first_cpu(struct sched_group *group)
- {
-       return cpumask_first(sched_group_span(group));
- }
  extern int group_balance_cpu(struct sched_group *sg);
  
  #ifdef CONFIG_SCHED_DEBUG
@@@ -2044,7 -2079,6 +2084,6 @@@ static inline int task_on_rq_migrating(
  
  #define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
  #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
- #define WF_ON_CPU   0x40 /* Wakee is on_cpu */
  
  #ifdef CONFIG_SMP
  static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@@ -2852,7 -2886,7 +2891,7 @@@ enum cpu_util_type 
  };
  
  unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
-                                unsigned long max, enum cpu_util_type type,
+                                enum cpu_util_type type,
                                 struct task_struct *p);
  
  static inline unsigned long cpu_bw_dl(struct rq *rq)