sched/numa: Report a NUMA task group ID

[platform/adaptation/renesas_rcar/renesas_kernel.git] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 078066d..b0b343b 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
  #include <linux/errno.h>
  #include <linux/nodemask.h>
  #include <linux/mm_types.h>
+#include <linux/preempt.h>
  
  #include <asm/page.h>
  #include <asm/ptrace.h>
@@ -107,14 +108,6 @@ extern unsigned long this_cpu_load(void);
  extern void calc_global_load(unsigned long ticks);
  extern void update_cpu_load_nohz(void);
  
-/* Notifier for when a task gets migrated to a new CPU */
-struct task_migration_notifier {
-       struct task_struct *task;
-       int from_cpu;
-       int to_cpu;
-};
-extern void register_task_migration_notifier(struct notifier_block *n);
-
  extern unsigned long get_parent_ip(unsigned long addr);
  
  extern void dump_cpu_task(int cpu);
@@ -435,6 +428,14 @@ struct task_cputime {
                 .sum_exec_runtime = 0,                          \
         }
  
+#define PREEMPT_ENABLED                (PREEMPT_NEED_RESCHED)
+
+#ifdef CONFIG_PREEMPT_COUNT
+#define PREEMPT_DISABLED       (1 + PREEMPT_ENABLED)
+#else
+#define PREEMPT_DISABLED       PREEMPT_ENABLED
+#endif
+
  /*
   * Disable preemption until the scheduler is running.
   * Reset by start_kernel()->sched_init()->init_idle().
@@ -442,7 +443,7 @@ struct task_cputime {
   * We include PREEMPT_ACTIVE to avoid cond_resched() from working
   * before the scheduler is active -- see should_resched().
   */
-#define INIT_PREEMPT_COUNT     (1 + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT     (PREEMPT_DISABLED + PREEMPT_ACTIVE)
  
  /**
   * struct thread_group_cputimer - thread group interval timer counts
@@ -776,6 +777,7 @@ enum cpu_idle_type {
  #define SD_ASYM_PACKING                0x0800  /* Place busy groups earlier in the domain */
  #define SD_PREFER_SIBLING      0x1000  /* Prefer to place tasks in a sibling domain */
  #define SD_OVERLAP             0x2000  /* sched_domains of this level overlap */
+#define SD_NUMA                        0x4000  /* cross-node balancing */
  
  extern int __weak arch_sd_sibiling_asym_packing(void);
  
@@ -819,6 +821,10 @@ struct sched_domain {
  
         u64 last_update;
  
+       /* idle_balance() stats */
+       u64 max_newidle_lb_cost;
+       unsigned long next_decay_max_lb_cost;
+
  #ifdef CONFIG_SCHEDSTATS
         /* load_balance() stats */
         unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1034,6 +1040,11 @@ struct task_struct {
  #ifdef CONFIG_SMP
         struct llist_node wake_entry;
         int on_cpu;
+       struct task_struct *last_wakee;
+       unsigned long wakee_flips;
+       unsigned long wakee_flip_decay_ts;
+
+       int wake_cpu;
  #endif
         int on_rq;
  
@@ -1331,8 +1342,29 @@ struct task_struct {
         int numa_scan_seq;
         int numa_migrate_seq;
         unsigned int numa_scan_period;
+       unsigned int numa_scan_period_max;
+       unsigned long numa_migrate_retry;
         u64 node_stamp;                 /* migration stamp  */
         struct callback_head numa_work;
+
+       struct list_head numa_entry;
+       struct numa_group *numa_group;
+
+       /*
+        * Exponential decaying average of faults on a per-node basis.
+        * Scheduling placement decisions are made based on the these counts.
+        * The values remain static for the duration of a PTE scan
+        */
+       unsigned long *numa_faults;
+
+       /*
+        * numa_faults_buffer records faults per node during the current
+        * scan window. When the scan completes, the counts in numa_faults
+        * decay and these values are copied.
+        */
+       unsigned long *numa_faults_buffer;
+
+       int numa_preferred_nid;
  #endif /* CONFIG_NUMA_BALANCING */
  
         struct rcu_head rcu;
@@ -1398,6 +1430,13 @@ struct task_struct {
                 unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
         } memcg_batch;
         unsigned int memcg_kmem_skip_account;
+       struct memcg_oom_info {
+               unsigned int may_oom:1;
+               unsigned int in_memcg_oom:1;
+               unsigned int oom_locked:1;
+               int wakeups;
+               struct mem_cgroup *wait_on_memcg;
+       } memcg_oom;
  #endif
  #ifdef CONFIG_UPROBES
         struct uprobe_task *utask;
@@ -1412,12 +1451,18 @@ struct task_struct {
  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  
  #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern pid_t task_numa_group_id(struct task_struct *p);
  extern void set_numabalancing_state(bool enabled);
  #else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+                                  bool migrated)
  {
  }
+static inline pid_t task_numa_group_id(struct task_struct *p)
+{
+       return 0;
+}
  static inline void set_numabalancing_state(bool enabled)
  {
  }
@@ -2174,15 +2219,15 @@ static inline bool thread_group_leader(struct task_struct *p)
   * all we care about is that we have a task with the appropriate
   * pid, we don't actually care if we have the right task.
   */
-static inline int has_group_leader_pid(struct task_struct *p)
+static inline bool has_group_leader_pid(struct task_struct *p)
  {
-       return p->pid == p->tgid;
+       return task_pid(p) == p->signal->leader_pid;
  }
  
  static inline
-int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
  {
-       return p1->tgid == p2->tgid;
+       return p1->signal == p2->signal;
  }
  
  static inline struct task_struct *next_thread(const struct task_struct *p)
@@ -2400,11 +2445,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
         return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  }
  
-static inline int need_resched(void)
-{
-       return unlikely(test_thread_flag(TIF_NEED_RESCHED));
-}
-
  /*
   * cond_resched() and cond_resched_lock(): latency reduction via
   * explicit rescheduling in places that are safe. The return
@@ -2473,36 +2513,105 @@ static inline int tsk_is_polling(struct task_struct *p)
  {
         return task_thread_info(p)->status & TS_POLLING;
  }
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
  {
         current_thread_info()->status |= TS_POLLING;
  }
  
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+       __current_set_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        */
+       smp_mb();
+
+       return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
  {
         current_thread_info()->status &= ~TS_POLLING;
-       smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+       __current_clr_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        */
+       smp_mb();
+
+       return unlikely(tif_need_resched());
  }
  #elif defined(TIF_POLLING_NRFLAG)
  static inline int tsk_is_polling(struct task_struct *p)
  {
         return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
  }
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
  {
         set_thread_flag(TIF_POLLING_NRFLAG);
  }
  
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+       __current_set_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        *
+        * XXX: assumes set/clear bit are identical barrier wise.
+        */
+       smp_mb__after_clear_bit();
+
+       return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
  {
         clear_thread_flag(TIF_POLLING_NRFLAG);
  }
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+       __current_clr_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        */
+       smp_mb__after_clear_bit();
+
+       return unlikely(tif_need_resched());
+}
+
  #else
  static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+       return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+       return unlikely(tif_need_resched());
+}
  #endif
  
+static __always_inline bool need_resched(void)
+{
+       return unlikely(tif_need_resched());
+}
+
  /*
   * Thread group CPU time accounting.
   */