From f27dde8deef33c9e58027df11ceab2198601d6a6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Aug 2013 14:55:31 +0200 Subject: [PATCH] sched: Add NEED_RESCHED to the preempt_count In order to combine the preemption and need_resched test we need to fold the need_resched information into the preempt_count value. Since the NEED_RESCHED flag is set across CPUs this needs to be an atomic operation, however we very much want to avoid making preempt_count atomic, therefore we keep the existing TIF_NEED_RESCHED infrastructure in place but at 3 sites test it and fold its value into preempt_count; namely: - resched_task() when setting TIF_NEED_RESCHED on the current task - scheduler_ipi() when resched_task() sets TIF_NEED_RESCHED on a remote task it follows it up with a reschedule IPI and we can modify the cpu local preempt_count from there. - cpu_idle_loop() for when resched_task() found tsk_is_polling(). We use an inverted bitmask to indicate need_resched so that a 0 means both need_resched and !atomic. Also remove the barrier() in preempt_enable() between preempt_enable_no_resched() and preempt_check_resched() to avoid having to reload the preemption value and allow the compiler to use the flags of the previuos decrement. I couldn't come up with any sane reason for this barrier() to be there as preempt_enable_no_resched() already has a barrier() before doing the decrement. Suggested-by: Ingo Molnar Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-7a7m5qqbn5pmwnd4wko9u6da@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 47 ++++++++++++++++++++++++++++++++++++++++++----- include/linux/sched.h | 7 +++++-- kernel/cpu/idle.c | 7 +++++++ kernel/sched/core.c | 20 +++++++++++++++----- 4 files changed, 69 insertions(+), 12 deletions(-) diff --git a/include/linux/preempt.h b/include/linux/preempt.h index eaac52a..92e3418 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -10,9 +10,19 @@ #include #include +/* + * We use the MSB mostly because its available; see for + * the other bits -- can't include that header due to inclusion hell. + */ +#define PREEMPT_NEED_RESCHED 0x80000000 + +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ static __always_inline int preempt_count(void) { - return current_thread_info()->preempt_count; + return current_thread_info()->preempt_count & ~PREEMPT_NEED_RESCHED; } static __always_inline int *preempt_count_ptr(void) @@ -20,11 +30,40 @@ static __always_inline int *preempt_count_ptr(void) return ¤t_thread_info()->preempt_count; } +/* + * We now loose PREEMPT_NEED_RESCHED and cause an extra reschedule; however the + * alternative is loosing a reschedule. Better schedule too often -- also this + * should be a very rare operation. + */ static __always_inline void preempt_count_set(int pc) { *preempt_count_ptr() = pc; } +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * single instruction. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + +static __always_inline void set_preempt_need_resched(void) +{ + *preempt_count_ptr() &= ~PREEMPT_NEED_RESCHED; +} + +static __always_inline void clear_preempt_need_resched(void) +{ + *preempt_count_ptr() |= PREEMPT_NEED_RESCHED; +} + +static __always_inline bool test_preempt_need_resched(void) +{ + return !(*preempt_count_ptr() & PREEMPT_NEED_RESCHED); +} + #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void add_preempt_count(int val); extern void sub_preempt_count(int val); @@ -42,7 +81,7 @@ asmlinkage void preempt_schedule(void); #define preempt_check_resched() \ do { \ - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ + if (unlikely(!*preempt_count_ptr())) \ preempt_schedule(); \ } while (0) @@ -52,7 +91,7 @@ void preempt_schedule_context(void); #define preempt_check_resched_context() \ do { \ - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ + if (unlikely(!*preempt_count_ptr())) \ preempt_schedule_context(); \ } while (0) #else @@ -88,7 +127,6 @@ do { \ #define preempt_enable() \ do { \ preempt_enable_no_resched(); \ - barrier(); \ preempt_check_resched(); \ } while (0) @@ -116,7 +154,6 @@ do { \ #define preempt_enable_notrace() \ do { \ preempt_enable_no_resched_notrace(); \ - barrier(); \ preempt_check_resched_context(); \ } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index e783ec5..9fa151f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -22,6 +22,7 @@ struct sched_param { #include #include #include +#include #include #include @@ -434,7 +435,9 @@ struct task_cputime { * We include PREEMPT_ACTIVE to avoid cond_resched() from working * before the scheduler is active -- see should_resched(). */ -#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE) +#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE + PREEMPT_NEED_RESCHED) +#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED) +#define PREEMPT_DISABLED (1 + PREEMPT_NEED_RESCHED) /** * struct thread_group_cputimer - thread group interval timer counts @@ -2408,7 +2411,7 @@ static inline int signal_pending_state(long state, struct task_struct *p) static inline int need_resched(void) { - return unlikely(test_thread_flag(TIF_NEED_RESCHED)); + return unlikely(test_preempt_need_resched()); } /* diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c index c261409..988573a 100644 --- a/kernel/cpu/idle.c +++ b/kernel/cpu/idle.c @@ -105,6 +105,13 @@ static void cpu_idle_loop(void) __current_set_polling(); } arch_cpu_idle_exit(); + /* + * We need to test and propagate the TIF_NEED_RESCHED + * bit here because we might not have send the + * reschedule IPI to idle tasks. + */ + if (tif_need_resched()) + set_preempt_need_resched(); } tick_nohz_idle_exit(); schedule_preempt_disabled(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe89afa..ee61f5a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -525,8 +525,10 @@ void resched_task(struct task_struct *p) set_tsk_need_resched(p); cpu = task_cpu(p); - if (cpu == smp_processor_id()) + if (cpu == smp_processor_id()) { + set_preempt_need_resched(); return; + } /* NEED_RESCHED must be visible before we test polling */ smp_mb(); @@ -1391,6 +1393,14 @@ static void sched_ttwu_pending(void) void scheduler_ipi(void) { + /* + * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting + * TIF_NEED_RESCHED remotely (for the first time) will also send + * this IPI. + */ + if (tif_need_resched()) + set_preempt_need_resched(); + if (llist_empty(&this_rq()->wake_list) && !tick_nohz_full_cpu(smp_processor_id()) && !got_nohz_idle_kick()) @@ -1714,7 +1724,7 @@ void sched_fork(struct task_struct *p) #endif #ifdef CONFIG_PREEMPT_COUNT /* Want to start with kernel preemption disabled. */ - task_thread_info(p)->preempt_count = 1; + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; #endif #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); @@ -2425,6 +2435,7 @@ need_resched: put_prev_task(rq, prev); next = pick_next_task(rq); clear_tsk_need_resched(prev); + clear_preempt_need_resched(); rq->skip_clock_update = 0; if (likely(prev != next)) { @@ -2536,11 +2547,10 @@ EXPORT_SYMBOL(preempt_schedule); */ asmlinkage void __sched preempt_schedule_irq(void) { - struct thread_info *ti = current_thread_info(); enum ctx_state prev_state; /* Catch callers which need to be fixed */ - BUG_ON(ti->preempt_count || !irqs_disabled()); + BUG_ON(preempt_count() || !irqs_disabled()); prev_state = exception_enter(); @@ -4207,7 +4217,7 @@ void init_idle(struct task_struct *idle, int cpu) raw_spin_unlock_irqrestore(&rq->lock, flags); /* Set the preempt count _outside_ the spinlocks! */ - task_thread_info(idle)->preempt_count = 0; + task_thread_info(idle)->preempt_count = PREEMPT_ENABLED; /* * The idle tasks have their own, simple scheduling class: -- 2.7.4