#include <linux/errno.h>
#include <linux/nodemask.h>
#include <linux/mm_types.h>
+#include <linux/preempt.h>
#include <asm/page.h>
#include <asm/ptrace.h>
.sum_exec_runtime = 0, \
}
+#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
+
+#ifdef CONFIG_PREEMPT_COUNT
+#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED)
+#else
+#define PREEMPT_DISABLED PREEMPT_ENABLED
+#endif
+
/*
* Disable preemption until the scheduler is running.
* Reset by start_kernel()->sched_init()->init_idle().
* We include PREEMPT_ACTIVE to avoid cond_resched() from working
* before the scheduler is active -- see should_resched().
*/
-#define INIT_PREEMPT_COUNT (1 + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE)
/**
* struct thread_group_cputimer - thread group interval timer counts
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
+#define SD_NUMA 0x4000 /* cross-node balancing */
extern int __weak arch_sd_sibiling_asym_packing(void);
u64 last_update;
+ /* idle_balance() stats */
+ u64 max_newidle_lb_cost;
+ unsigned long next_decay_max_lb_cost;
+
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
struct task_struct *last_wakee;
unsigned long wakee_flips;
unsigned long wakee_flip_decay_ts;
+
+ int wake_cpu;
#endif
int on_rq;
int numa_scan_seq;
int numa_migrate_seq;
unsigned int numa_scan_period;
+ unsigned int numa_scan_period_max;
+ unsigned long numa_migrate_retry;
u64 node_stamp; /* migration stamp */
struct callback_head numa_work;
+
+ struct list_head numa_entry;
+ struct numa_group *numa_group;
+
+ /*
+ * Exponential decaying average of faults on a per-node basis.
+ * Scheduling placement decisions are made based on the these counts.
+ * The values remain static for the duration of a PTE scan
+ */
+ unsigned long *numa_faults;
+
+ /*
+ * numa_faults_buffer records faults per node during the current
+ * scan window. When the scan completes, the counts in numa_faults
+ * decay and these values are copied.
+ */
+ unsigned long *numa_faults_buffer;
+
+ int numa_preferred_nid;
#endif /* CONFIG_NUMA_BALANCING */
struct rcu_head rcu;
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
#ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
extern void set_numabalancing_state(bool enabled);
#else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+ bool migrated)
{
}
static inline void set_numabalancing_state(bool enabled)
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
}
-static inline int need_resched(void)
-{
- return unlikely(test_thread_flag(TIF_NEED_RESCHED));
-}
-
/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
{
return task_thread_info(p)->status & TS_POLLING;
}
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
{
current_thread_info()->status |= TS_POLLING;
}
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ __current_set_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb();
+
+ return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
{
current_thread_info()->status &= ~TS_POLLING;
- smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb();
+
+ return unlikely(tif_need_resched());
}
#elif defined(TIF_POLLING_NRFLAG)
static inline int tsk_is_polling(struct task_struct *p)
{
return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
}
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
{
set_thread_flag(TIF_POLLING_NRFLAG);
}
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ __current_set_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ *
+ * XXX: assumes set/clear bit are identical barrier wise.
+ */
+ smp_mb__after_clear_bit();
+
+ return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
{
clear_thread_flag(TIF_POLLING_NRFLAG);
}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Polling state must be visible before we test NEED_RESCHED,
+ * paired by resched_task()
+ */
+ smp_mb__after_clear_bit();
+
+ return unlikely(tif_need_resched());
+}
+
#else
static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+ return unlikely(tif_need_resched());
+}
#endif
+static __always_inline bool need_resched(void)
+{
+ return unlikely(tif_need_resched());
+}
+
/*
* Thread group CPU time accounting.
*/