sched: Fix ancient race in do_exit()

author Yasunori Goto <y-goto@jp.fujitsu.com>

Tue, 17 Jan 2012 08:40:31 +0000 (17:40 +0900)

committer Ingo Molnar <mingo@elte.hu>

Fri, 27 Jan 2012 10:55:36 +0000 (11:55 +0100)
author Yasunori Goto <y-goto@jp.fujitsu.com>
Tue, 17 Jan 2012 08:40:31 +0000 (17:40 +0900)
committer Ingo Molnar <mingo@elte.hu>
Fri, 27 Jan 2012 10:55:36 +0000 (11:55 +0100)
diff --git a/kernel/exit.c b/kernel/exit.c

index 294b170..4b4042f 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1038,6 +1038,22 @@ void do_exit(long code)
         if (tsk->nr_dirtied)
                 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
         exit_rcu();
+
+       /*
+        * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+        * when the following two conditions become true.
+        *   - There is race condition of mmap_sem (It is acquired by
+        *     exit_mm()), and
+        *   - SMI occurs before setting TASK_RUNINNG.
+        *     (or hypervisor of virtual machine switches to other guest)
+        *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+        *
+        * To avoid it, we have to wait for releasing tsk->pi_lock which
+        * is held by try_to_wake_up()
+        */
+       smp_mb();
+       raw_spin_unlock_wait(&tsk->pi_lock);
+
         /* causes final put_task_struct in finish_task_switch(). */
         tsk->state = TASK_DEAD;
         tsk->flags |= PF_NOFREEZE;      /* tell freezer to ignore us */
author	Yasunori Goto <y-goto@jp.fujitsu.com>
	Tue, 17 Jan 2012 08:40:31 +0000 (17:40 +0900)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 27 Jan 2012 10:55:36 +0000 (11:55 +0100)