sched/core: Fix ttwu() race

author Peter Zijlstra <peterz@infradead.org>

Mon, 22 Jun 2020 10:01:23 +0000 (12:01 +0200)

committer Borislav Petkov <bp@suse.de>

Sun, 28 Jun 2020 15:01:20 +0000 (17:01 +0200)
author Peter Zijlstra <peterz@infradead.org>
Mon, 22 Jun 2020 10:01:23 +0000 (12:01 +0200)
committer Borislav Petkov <bp@suse.de>
Sun, 28 Jun 2020 15:01:20 +0000 (17:01 +0200)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index c1ba2e5..60791b9 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2293,8 +2293,15 @@ void sched_ttwu_pending(void *arg)
         rq_lock_irqsave(rq, &rf);
         update_rq_clock(rq);
  
-       llist_for_each_entry_safe(p, t, llist, wake_entry)
+       llist_for_each_entry_safe(p, t, llist, wake_entry) {
+               if (WARN_ON_ONCE(p->on_cpu))
+                       smp_cond_load_acquire(&p->on_cpu, !VAL);
+
+               if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
+                       set_task_cpu(p, cpu_of(rq));
+
                 ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
+       }
  
         rq_unlock_irqrestore(rq, &rf);
  }
@@ -2378,6 +2385,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
  static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
  {
         if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
+               if (WARN_ON_ONCE(cpu == smp_processor_id()))
+                       return false;
+
                 sched_clock_cpu(cpu); /* Sync clocks across CPUs */
                 __ttwu_queue_wakelist(p, cpu, wake_flags);
                 return true;
@@ -2528,7 +2538,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                         goto out;
  
                 success = 1;
-               cpu = task_cpu(p);
                 trace_sched_waking(p);
                 p->state = TASK_RUNNING;
                 trace_sched_wakeup(p);
@@ -2550,7 +2559,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
  
         /* We're going to change ->state: */
         success = 1;
-       cpu = task_cpu(p);
  
         /*
          * Ensure we load p->on_rq _after_ p->state, otherwise it would
@@ -2614,8 +2622,21 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
          * which potentially sends an IPI instead of spinning on p->on_cpu to
          * let the waker make forward progress. This is safe because IRQs are
          * disabled and the IPI will deliver after on_cpu is cleared.
+        *
+        * Ensure we load task_cpu(p) after p->on_cpu:
+        *
+        * set_task_cpu(p, cpu);
+        *   STORE p->cpu = @cpu
+        * __schedule() (switch to task 'p')
+        *   LOCK rq->lock
+        *   smp_mb__after_spin_lock()          smp_cond_load_acquire(&p->on_cpu)
+        *   STORE p->on_cpu = 1                LOAD p->cpu
+        *
+        * to ensure we observe the correct CPU on which the task is currently
+        * scheduling.
          */
-       if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ))
+       if (smp_load_acquire(&p->on_cpu) &&
+           ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_RQ))
                 goto unlock;
  
         /*
@@ -2635,6 +2656,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                 psi_ttwu_dequeue(p);
                 set_task_cpu(p, cpu);
         }
+#else
+       cpu = task_cpu(p);
  #endif /* CONFIG_SMP */
  
         ttwu_queue(p, cpu, wake_flags);
@@ -2642,7 +2665,7 @@ unlock:
         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
  out:
         if (success)
-               ttwu_stat(p, cpu, wake_flags);
+               ttwu_stat(p, task_cpu(p), wake_flags);
         preempt_enable();
  
         return success;
author	Peter Zijlstra <peterz@infradead.org>
	Mon, 22 Jun 2020 10:01:23 +0000 (12:01 +0200)
committer	Borislav Petkov <bp@suse.de>
	Sun, 28 Jun 2020 15:01:20 +0000 (17:01 +0200)