sched/numa: Report a NUMA task group ID

[platform/adaptation/renesas_rcar/renesas_kernel.git] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 6682da3..b0b343b 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -22,6 +22,7 @@ struct sched_param {
  #include <linux/errno.h>
  #include <linux/nodemask.h>
  #include <linux/mm_types.h>
+#include <linux/preempt.h>
  
  #include <asm/page.h>
  #include <asm/ptrace.h>
@@ -427,6 +428,14 @@ struct task_cputime {
                 .sum_exec_runtime = 0,                          \
         }
  
+#define PREEMPT_ENABLED                (PREEMPT_NEED_RESCHED)
+
+#ifdef CONFIG_PREEMPT_COUNT
+#define PREEMPT_DISABLED       (1 + PREEMPT_ENABLED)
+#else
+#define PREEMPT_DISABLED       PREEMPT_ENABLED
+#endif
+
  /*
   * Disable preemption until the scheduler is running.
   * Reset by start_kernel()->sched_init()->init_idle().
@@ -434,7 +443,7 @@ struct task_cputime {
   * We include PREEMPT_ACTIVE to avoid cond_resched() from working
   * before the scheduler is active -- see should_resched().
   */
-#define INIT_PREEMPT_COUNT     (1 + PREEMPT_ACTIVE)
+#define INIT_PREEMPT_COUNT     (PREEMPT_DISABLED + PREEMPT_ACTIVE)
  
  /**
   * struct thread_group_cputimer - thread group interval timer counts
@@ -768,6 +777,7 @@ enum cpu_idle_type {
  #define SD_ASYM_PACKING                0x0800  /* Place busy groups earlier in the domain */
  #define SD_PREFER_SIBLING      0x1000  /* Prefer to place tasks in a sibling domain */
  #define SD_OVERLAP             0x2000  /* sched_domains of this level overlap */
+#define SD_NUMA                        0x4000  /* cross-node balancing */
  
  extern int __weak arch_sd_sibiling_asym_packing(void);
  
@@ -811,6 +821,10 @@ struct sched_domain {
  
         u64 last_update;
  
+       /* idle_balance() stats */
+       u64 max_newidle_lb_cost;
+       unsigned long next_decay_max_lb_cost;
+
  #ifdef CONFIG_SCHEDSTATS
         /* load_balance() stats */
         unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1029,6 +1043,8 @@ struct task_struct {
         struct task_struct *last_wakee;
         unsigned long wakee_flips;
         unsigned long wakee_flip_decay_ts;
+
+       int wake_cpu;
  #endif
         int on_rq;
  
@@ -1326,8 +1342,29 @@ struct task_struct {
         int numa_scan_seq;
         int numa_migrate_seq;
         unsigned int numa_scan_period;
+       unsigned int numa_scan_period_max;
+       unsigned long numa_migrate_retry;
         u64 node_stamp;                 /* migration stamp  */
         struct callback_head numa_work;
+
+       struct list_head numa_entry;
+       struct numa_group *numa_group;
+
+       /*
+        * Exponential decaying average of faults on a per-node basis.
+        * Scheduling placement decisions are made based on the these counts.
+        * The values remain static for the duration of a PTE scan
+        */
+       unsigned long *numa_faults;
+
+       /*
+        * numa_faults_buffer records faults per node during the current
+        * scan window. When the scan completes, the counts in numa_faults
+        * decay and these values are copied.
+        */
+       unsigned long *numa_faults_buffer;
+
+       int numa_preferred_nid;
  #endif /* CONFIG_NUMA_BALANCING */
  
         struct rcu_head rcu;
@@ -1414,11 +1451,17 @@ struct task_struct {
  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  
  #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern pid_t task_numa_group_id(struct task_struct *p);
  extern void set_numabalancing_state(bool enabled);
  #else
-static inline void task_numa_fault(int node, int pages, bool migrated)
+static inline void task_numa_fault(int last_node, int node, int pages,
+                                  bool migrated)
+{
+}
+static inline pid_t task_numa_group_id(struct task_struct *p)
  {
+       return 0;
  }
  static inline void set_numabalancing_state(bool enabled)
  {
@@ -2402,11 +2445,6 @@ static inline int signal_pending_state(long state, struct task_struct *p)
         return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
  }
  
-static inline int need_resched(void)
-{
-       return unlikely(test_thread_flag(TIF_NEED_RESCHED));
-}
-
  /*
   * cond_resched() and cond_resched_lock(): latency reduction via
   * explicit rescheduling in places that are safe. The return
@@ -2475,36 +2513,105 @@ static inline int tsk_is_polling(struct task_struct *p)
  {
         return task_thread_info(p)->status & TS_POLLING;
  }
-static inline void current_set_polling(void)
+static inline void __current_set_polling(void)
  {
         current_thread_info()->status |= TS_POLLING;
  }
  
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+       __current_set_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        */
+       smp_mb();
+
+       return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
  {
         current_thread_info()->status &= ~TS_POLLING;
-       smp_mb__after_clear_bit();
+}
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+       __current_clr_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        */
+       smp_mb();
+
+       return unlikely(tif_need_resched());
  }
  #elif defined(TIF_POLLING_NRFLAG)
  static inline int tsk_is_polling(struct task_struct *p)
  {
         return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
  }
-static inline void current_set_polling(void)
+
+static inline void __current_set_polling(void)
  {
         set_thread_flag(TIF_POLLING_NRFLAG);
  }
  
-static inline void current_clr_polling(void)
+static inline bool __must_check current_set_polling_and_test(void)
+{
+       __current_set_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        *
+        * XXX: assumes set/clear bit are identical barrier wise.
+        */
+       smp_mb__after_clear_bit();
+
+       return unlikely(tif_need_resched());
+}
+
+static inline void __current_clr_polling(void)
  {
         clear_thread_flag(TIF_POLLING_NRFLAG);
  }
+
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+       __current_clr_polling();
+
+       /*
+        * Polling state must be visible before we test NEED_RESCHED,
+        * paired by resched_task()
+        */
+       smp_mb__after_clear_bit();
+
+       return unlikely(tif_need_resched());
+}
+
  #else
  static inline int tsk_is_polling(struct task_struct *p) { return 0; }
-static inline void current_set_polling(void) { }
-static inline void current_clr_polling(void) { }
+static inline void __current_set_polling(void) { }
+static inline void __current_clr_polling(void) { }
+
+static inline bool __must_check current_set_polling_and_test(void)
+{
+       return unlikely(tif_need_resched());
+}
+static inline bool __must_check current_clr_polling_and_test(void)
+{
+       return unlikely(tif_need_resched());
+}
  #endif
  
+static __always_inline bool need_resched(void)
+{
+       return unlikely(tif_need_resched());
+}
+
  /*
   * Thread group CPU time accounting.
   */