sched/preempt: Fix up missed PREEMPT_NEED_RESCHED folding
authorPeter Zijlstra <peterz@infradead.org>
Wed, 20 Nov 2013 11:22:37 +0000 (12:22 +0100)
committerIngo Molnar <mingo@kernel.org>
Mon, 13 Jan 2014 16:38:55 +0000 (17:38 +0100)
With various drivers wanting to inject idle time; we get people
calling idle routines outside of the idle loop proper.

Therefore we need to be extra careful about not missing
TIF_NEED_RESCHED -> PREEMPT_NEED_RESCHED propagations.

While looking at this, I also realized there's a small window in the
existing idle loop where we can miss TIF_NEED_RESCHED; when it hits
right after the tif_need_resched() test at the end of the loop but
right before the need_resched() test at the start of the loop.

So move preempt_fold_need_resched() out of the loop where we're
guaranteed to have TIF_NEED_RESCHED set.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-x9jgh45oeayzajz2mjt0y7d6@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/mwait.h
include/linux/preempt.h
include/linux/sched.h
kernel/cpu/idle.c
kernel/sched/core.c

index 19b71c4..1da25a5 100644 (file)
@@ -53,7 +53,7 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
                if (!need_resched())
                        __mwait(eax, ecx);
        }
-       __current_clr_polling();
+       current_clr_polling();
 }
 
 #endif /* _ASM_X86_MWAIT_H */
index dd9ddf8..59749fc 100644 (file)
@@ -134,6 +134,21 @@ do { \
 #undef preempt_check_resched
 #endif
 
+#ifdef CONFIG_PREEMPT
+#define preempt_set_need_resched() \
+do { \
+       set_preempt_need_resched(); \
+} while (0)
+#define preempt_fold_need_resched() \
+do { \
+       if (tif_need_resched()) \
+               set_preempt_need_resched(); \
+} while (0)
+#else
+#define preempt_set_need_resched() do { } while (0)
+#define preempt_fold_need_resched() do { } while (0)
+#endif
+
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 
 struct preempt_notifier;
index a038752..ffccdad 100644 (file)
@@ -2745,6 +2745,21 @@ static inline bool __must_check current_clr_polling_and_test(void)
 }
 #endif
 
+static inline void current_clr_polling(void)
+{
+       __current_clr_polling();
+
+       /*
+        * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+        * Once the bit is cleared, we'll get IPIs with every new
+        * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+        * fold.
+        */
+       smp_mb(); /* paired with resched_task() */
+
+       preempt_fold_need_resched();
+}
+
 static __always_inline bool need_resched(void)
 {
        return unlikely(tif_need_resched());
index 988573a..277f494 100644 (file)
@@ -105,14 +105,17 @@ static void cpu_idle_loop(void)
                                __current_set_polling();
                        }
                        arch_cpu_idle_exit();
-                       /*
-                        * We need to test and propagate the TIF_NEED_RESCHED
-                        * bit here because we might not have send the
-                        * reschedule IPI to idle tasks.
-                        */
-                       if (tif_need_resched())
-                               set_preempt_need_resched();
                }
+
+               /*
+                * Since we fell out of the loop above, we know
+                * TIF_NEED_RESCHED must be set, propagate it into
+                * PREEMPT_NEED_RESCHED.
+                *
+                * This is required because for polling idle loops we will
+                * not have had an IPI to fold the state for us.
+                */
+               preempt_set_need_resched();
                tick_nohz_idle_exit();
                schedule_preempt_disabled();
        }
index 392c6f8..0326c06 100644 (file)
@@ -1510,8 +1510,7 @@ void scheduler_ipi(void)
         * TIF_NEED_RESCHED remotely (for the first time) will also send
         * this IPI.
         */
-       if (tif_need_resched())
-               set_preempt_need_resched();
+       preempt_fold_need_resched();
 
        if (llist_empty(&this_rq()->wake_list)
                        && !tick_nohz_full_cpu(smp_processor_id())