Merge tag 'sched-core-2022-08-01' of git://git.kernel.org/pub/scm/linux/kernel/git...
[platform/kernel/linux-starfive.git] / kernel / sched / core.c
index da0bf6f..5555e49 100644 (file)
@@ -873,15 +873,11 @@ static inline void hrtick_rq_init(struct rq *rq)
        ({                                                              \
                typeof(ptr) _ptr = (ptr);                               \
                typeof(mask) _mask = (mask);                            \
-               typeof(*_ptr) _old, _val = *_ptr;                       \
+               typeof(*_ptr) _val = *_ptr;                             \
                                                                        \
-               for (;;) {                                              \
-                       _old = cmpxchg(_ptr, _val, _val | _mask);       \
-                       if (_old == _val)                               \
-                               break;                                  \
-                       _val = _old;                                    \
-               }                                                       \
-       _old;                                                           \
+               do {                                                    \
+               } while (!try_cmpxchg(_ptr, &_val, _val | _mask));      \
+       _val;                                                           \
 })
 
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
@@ -890,7 +886,7 @@ static inline void hrtick_rq_init(struct rq *rq)
  * this avoids any races wrt polling state changes and thereby avoids
  * spurious IPIs.
  */
-static bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct task_struct *p)
 {
        struct thread_info *ti = task_thread_info(p);
        return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
@@ -905,30 +901,28 @@ static bool set_nr_and_not_polling(struct task_struct *p)
 static bool set_nr_if_polling(struct task_struct *p)
 {
        struct thread_info *ti = task_thread_info(p);
-       typeof(ti->flags) old, val = READ_ONCE(ti->flags);
+       typeof(ti->flags) val = READ_ONCE(ti->flags);
 
        for (;;) {
                if (!(val & _TIF_POLLING_NRFLAG))
                        return false;
                if (val & _TIF_NEED_RESCHED)
                        return true;
-               old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-               if (old == val)
+               if (try_cmpxchg(&ti->flags, &val, val | _TIF_NEED_RESCHED))
                        break;
-               val = old;
        }
        return true;
 }
 
 #else
-static bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct task_struct *p)
 {
        set_tsk_need_resched(p);
        return true;
 }
 
 #ifdef CONFIG_SMP
-static bool set_nr_if_polling(struct task_struct *p)
+static inline bool set_nr_if_polling(struct task_struct *p)
 {
        return false;
 }
@@ -3808,7 +3802,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
        return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
 }
 
-static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+static inline bool ttwu_queue_cond(int cpu)
 {
        /*
         * Do not complicate things with the async wake_list while the CPU is
@@ -3824,13 +3818,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
        if (!cpus_share_cache(smp_processor_id(), cpu))
                return true;
 
+       if (cpu == smp_processor_id())
+               return false;
+
        /*
-        * If the task is descheduling and the only running task on the
-        * CPU then use the wakelist to offload the task activation to
-        * the soon-to-be-idle CPU as the current CPU is likely busy.
-        * nr_running is checked to avoid unnecessary task stacking.
+        * If the wakee cpu is idle, or the task is descheduling and the
+        * only running task on the CPU, then use the wakelist to offload
+        * the task activation to the idle (or soon-to-be-idle) CPU as
+        * the current CPU is likely busy. nr_running is checked to
+        * avoid unnecessary task stacking.
+        *
+        * Note that we can only get here with (wakee) p->on_rq=0,
+        * p->on_cpu can be whatever, we've done the dequeue, so
+        * the wakee has been accounted out of ->nr_running.
         */
-       if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
+       if (!cpu_rq(cpu)->nr_running)
                return true;
 
        return false;
@@ -3838,10 +3840,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
 
 static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
 {
-       if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
-               if (WARN_ON_ONCE(cpu == smp_processor_id()))
-                       return false;
-
+       if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu)) {
                sched_clock_cpu(cpu); /* Sync clocks across CPUs */
                __ttwu_queue_wakelist(p, cpu, wake_flags);
                return true;
@@ -4163,7 +4162,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         * scheduling.
         */
        if (smp_load_acquire(&p->on_cpu) &&
-           ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
+           ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
                goto unlock;
 
        /*
@@ -4753,7 +4752,8 @@ static inline void prepare_task(struct task_struct *next)
         * Claim the task as running, we do this before switching to it
         * such that any running task will have this set.
         *
-        * See the ttwu() WF_ON_CPU case and its ordering comment.
+        * See the smp_load_acquire(&p->on_cpu) case in ttwu() and
+        * its ordering comment.
         */
        WRITE_ONCE(next->on_cpu, 1);
 #endif
@@ -6500,8 +6500,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
                        io_wq_worker_sleeping(tsk);
        }
 
-       if (tsk_is_pi_blocked(tsk))
-               return;
+       /*
+        * spinlock and rwlock must not flush block requests.  This will
+        * deadlock if the callback attempts to acquire a lock which is
+        * already acquired.
+        */
+       SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT);
 
        /*
         * If we are going to sleep and we have plugged IO queued,
@@ -6998,17 +7002,29 @@ out_unlock:
 EXPORT_SYMBOL(set_user_nice);
 
 /*
- * can_nice - check if a task can reduce its nice value
+ * is_nice_reduction - check if nice value is an actual reduction
+ *
+ * Similar to can_nice() but does not perform a capability check.
+ *
  * @p: task
  * @nice: nice value
  */
-int can_nice(const struct task_struct *p, const int nice)
+static bool is_nice_reduction(const struct task_struct *p, const int nice)
 {
        /* Convert nice value [19,-20] to rlimit style value [1,40]: */
        int nice_rlim = nice_to_rlimit(nice);
 
-       return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-               capable(CAP_SYS_NICE));
+       return (nice_rlim <= task_rlimit(p, RLIMIT_NICE));
+}
+
+/*
+ * can_nice - check if a task can reduce its nice value
+ * @p: task
+ * @nice: nice value
+ */
+int can_nice(const struct task_struct *p, const int nice)
+{
+       return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE);
 }
 
 #ifdef __ARCH_WANT_SYS_NICE
@@ -7137,12 +7153,14 @@ struct task_struct *idle_task(int cpu)
  * required to meet deadlines.
  */
 unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
-                                unsigned long max, enum cpu_util_type type,
+                                enum cpu_util_type type,
                                 struct task_struct *p)
 {
-       unsigned long dl_util, util, irq;
+       unsigned long dl_util, util, irq, max;
        struct rq *rq = cpu_rq(cpu);
 
+       max = arch_scale_cpu_capacity(cpu);
+
        if (!uclamp_is_used() &&
            type == FREQUENCY_UTIL && rt_rq_is_runnable(&rq->rt)) {
                return max;
@@ -7222,10 +7240,9 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
        return min(max, util);
 }
 
-unsigned long sched_cpu_util(int cpu, unsigned long max)
+unsigned long sched_cpu_util(int cpu)
 {
-       return effective_cpu_util(cpu, cpu_util_cfs(cpu), max,
-                                 ENERGY_UTIL, NULL);
+       return effective_cpu_util(cpu, cpu_util_cfs(cpu), ENERGY_UTIL, NULL);
 }
 #endif /* CONFIG_SMP */
 
@@ -7287,6 +7304,69 @@ static bool check_same_owner(struct task_struct *p)
        return match;
 }
 
+/*
+ * Allow unprivileged RT tasks to decrease priority.
+ * Only issue a capable test if needed and only once to avoid an audit
+ * event on permitted non-privileged operations:
+ */
+static int user_check_sched_setscheduler(struct task_struct *p,
+                                        const struct sched_attr *attr,
+                                        int policy, int reset_on_fork)
+{
+       if (fair_policy(policy)) {
+               if (attr->sched_nice < task_nice(p) &&
+                   !is_nice_reduction(p, attr->sched_nice))
+                       goto req_priv;
+       }
+
+       if (rt_policy(policy)) {
+               unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
+
+               /* Can't set/change the rt policy: */
+               if (policy != p->policy && !rlim_rtprio)
+                       goto req_priv;
+
+               /* Can't increase priority: */
+               if (attr->sched_priority > p->rt_priority &&
+                   attr->sched_priority > rlim_rtprio)
+                       goto req_priv;
+       }
+
+       /*
+        * Can't set/change SCHED_DEADLINE policy at all for now
+        * (safest behavior); in the future we would like to allow
+        * unprivileged DL tasks to increase their relative deadline
+        * or reduce their runtime (both ways reducing utilization)
+        */
+       if (dl_policy(policy))
+               goto req_priv;
+
+       /*
+        * Treat SCHED_IDLE as nice 20. Only allow a switch to
+        * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
+        */
+       if (task_has_idle_policy(p) && !idle_policy(policy)) {
+               if (!is_nice_reduction(p, task_nice(p)))
+                       goto req_priv;
+       }
+
+       /* Can't change other user's priorities: */
+       if (!check_same_owner(p))
+               goto req_priv;
+
+       /* Normal users shall not reset the sched_reset_on_fork flag: */
+       if (p->sched_reset_on_fork && !reset_on_fork)
+               goto req_priv;
+
+       return 0;
+
+req_priv:
+       if (!capable(CAP_SYS_NICE))
+               return -EPERM;
+
+       return 0;
+}
+
 static int __sched_setscheduler(struct task_struct *p,
                                const struct sched_attr *attr,
                                bool user, bool pi)
@@ -7328,58 +7408,11 @@ recheck:
            (rt_policy(policy) != (attr->sched_priority != 0)))
                return -EINVAL;
 
-       /*
-        * Allow unprivileged RT tasks to decrease priority:
-        */
-       if (user && !capable(CAP_SYS_NICE)) {
-               if (fair_policy(policy)) {
-                       if (attr->sched_nice < task_nice(p) &&
-                           !can_nice(p, attr->sched_nice))
-                               return -EPERM;
-               }
-
-               if (rt_policy(policy)) {
-                       unsigned long rlim_rtprio =
-                                       task_rlimit(p, RLIMIT_RTPRIO);
-
-                       /* Can't set/change the rt policy: */
-                       if (policy != p->policy && !rlim_rtprio)
-                               return -EPERM;
-
-                       /* Can't increase priority: */
-                       if (attr->sched_priority > p->rt_priority &&
-                           attr->sched_priority > rlim_rtprio)
-                               return -EPERM;
-               }
-
-                /*
-                 * Can't set/change SCHED_DEADLINE policy at all for now
-                 * (safest behavior); in the future we would like to allow
-                 * unprivileged DL tasks to increase their relative deadline
-                 * or reduce their runtime (both ways reducing utilization)
-                 */
-               if (dl_policy(policy))
-                       return -EPERM;
-
-               /*
-                * Treat SCHED_IDLE as nice 20. Only allow a switch to
-                * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
-                */
-               if (task_has_idle_policy(p) && !idle_policy(policy)) {
-                       if (!can_nice(p, task_nice(p)))
-                               return -EPERM;
-               }
-
-               /* Can't change other user's priorities: */
-               if (!check_same_owner(p))
-                       return -EPERM;
-
-               /* Normal users shall not reset the sched_reset_on_fork flag: */
-               if (p->sched_reset_on_fork && !reset_on_fork)
-                       return -EPERM;
-       }
-
        if (user) {
+               retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork);
+               if (retval)
+                       return retval;
+
                if (attr->sched_flags & SCHED_FLAG_SUGOV)
                        return -EINVAL;
 
@@ -9531,7 +9564,7 @@ static struct kmem_cache *task_group_cache __read_mostly;
 #endif
 
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
-DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+DECLARE_PER_CPU(cpumask_var_t, select_rq_mask);
 
 void __init sched_init(void)
 {
@@ -9580,7 +9613,7 @@ void __init sched_init(void)
        for_each_possible_cpu(i) {
                per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
                        cpumask_size(), GFP_KERNEL, cpu_to_node(i));
-               per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+               per_cpu(select_rq_mask, i) = (cpumask_var_t)kzalloc_node(
                        cpumask_size(), GFP_KERNEL, cpu_to_node(i));
        }
 #endif /* CONFIG_CPUMASK_OFFSTACK */