sched: add tracepoints related to NUMA task migration

[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index c7395d9..867b0a4 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -872,15 +872,6 @@ static unsigned int task_scan_max(struct task_struct *p)
         return max(smin, smax);
  }
  
-/*
- * Once a preferred node is selected the scheduler balancer will prefer moving
- * a task to that node for sysctl_numa_balancing_settle_count number of PTE
- * scans. This will give the process the chance to accumulate more faults on
- * the preferred node but still allow the scheduler to move the task again if
- * the nodes CPUs are overloaded.
- */
-unsigned int sysctl_numa_balancing_settle_count __read_mostly = 4;
-
  static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
  {
         rq->nr_numa_running += (p->numa_preferred_nid != -1);
@@ -930,7 +921,8 @@ static inline unsigned long group_faults(struct task_struct *p, int nid)
         if (!p->numa_group)
                 return 0;
  
-       return p->numa_group->faults[2*nid] + p->numa_group->faults[2*nid+1];
+       return p->numa_group->faults[task_faults_idx(nid, 0)] +
+               p->numa_group->faults[task_faults_idx(nid, 1)];
  }
  
  /*
@@ -1023,7 +1015,7 @@ struct task_numa_env {
  
         struct numa_stats src_stats, dst_stats;
  
-       int imbalance_pct, idx;
+       int imbalance_pct;
  
         struct task_struct *best_task;
         long best_imp;
@@ -1211,7 +1203,7 @@ static int task_numa_migrate(struct task_struct *p)
          * elsewhere, so there is no point in (re)trying.
          */
         if (unlikely(!sd)) {
-               p->numa_preferred_nid = cpu_to_node(task_cpu(p));
+               p->numa_preferred_nid = task_node(p);
                 return -EINVAL;
         }
  
@@ -1258,11 +1250,15 @@ static int task_numa_migrate(struct task_struct *p)
         p->numa_scan_period = task_scan_min(p);
  
         if (env.best_task == NULL) {
-               int ret = migrate_task_to(p, env.best_cpu);
+               ret = migrate_task_to(p, env.best_cpu);
+               if (ret != 0)
+                       trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
                 return ret;
         }
  
         ret = migrate_swap(p, env.best_task);
+       if (ret != 0)
+               trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
         put_task_struct(env.best_task);
         return ret;
  }
@@ -1278,7 +1274,7 @@ static void numa_migrate_preferred(struct task_struct *p)
         p->numa_migrate_retry = jiffies + HZ;
  
         /* Success if task is already running on preferred CPU */
-       if (cpu_to_node(task_cpu(p)) == p->numa_preferred_nid)
+       if (task_node(p) == p->numa_preferred_nid)
                 return;
  
         /* Otherwise, try migrate to a CPU on the preferred node */
@@ -1350,7 +1346,6 @@ static void update_task_scan_period(struct task_struct *p,
                  * scanning faster if shared accesses dominate as it may
                  * simply bounce migrations uselessly
                  */
-               period_slot = DIV_ROUND_UP(diff, NUMA_PERIOD_SLOTS);
                 ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared));
                 diff = (diff * ratio) / NUMA_PERIOD_SLOTS;
         }
@@ -3923,7 +3918,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
  {
         struct sched_entity *se = tg->se[cpu];
  
-       if (!tg->parent || !wl) /* the trivial, non-cgroup case */
+       if (!tg->parent)        /* the trivial, non-cgroup case */
                 return wl;
  
         for_each_sched_entity(se) {
@@ -4101,12 +4096,16 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
   */
  static struct sched_group *
  find_idlest_group(struct sched_domain *sd, struct task_struct *p,
-                 int this_cpu, int load_idx)
+                 int this_cpu, int sd_flag)
  {
         struct sched_group *idlest = NULL, *group = sd->groups;
         unsigned long min_load = ULONG_MAX, this_load = 0;
+       int load_idx = sd->forkexec_idx;
         int imbalance = 100 + (sd->imbalance_pct-100)/2;
  
+       if (sd_flag & SD_BALANCE_WAKE)
+               load_idx = sd->wake_idx;
+
         do {
                 unsigned long load, avg_load;
                 int local_group;
@@ -4274,7 +4273,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
         }
  
         while (sd) {
-               int load_idx = sd->forkexec_idx;
                 struct sched_group *group;
                 int weight;
  
@@ -4283,10 +4281,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
                         continue;
                 }
  
-               if (sd_flag & SD_BALANCE_WAKE)
-                       load_idx = sd->wake_idx;
-
-               group = find_idlest_group(sd, p, cpu, load_idx);
+               group = find_idlest_group(sd, p, cpu, sd_flag);
                 if (!group) {
                         sd = sd->child;
                         continue;
@@ -5512,7 +5507,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                         struct sched_group *group, int load_idx,
                         int local_group, struct sg_lb_stats *sgs)
  {
-       unsigned long nr_running;
         unsigned long load;
         int i;
  
@@ -5521,8 +5515,6 @@ static inline void update_sg_lb_stats(struct lb_env *env,
         for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
                 struct rq *rq = cpu_rq(i);
  
-               nr_running = rq->nr_running;
-
                 /* Bias balancing toward cpus of our domain */
                 if (local_group)
                         load = target_load(i, load_idx);
@@ -5530,7 +5522,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                         load = source_load(i, load_idx);
  
                 sgs->group_load += load;
-               sgs->sum_nr_running += nr_running;
+               sgs->sum_nr_running += rq->nr_running;
  #ifdef CONFIG_NUMA_BALANCING
                 sgs->nr_numa_running += rq->nr_numa_running;
                 sgs->nr_preferred_running += rq->nr_preferred_running;
@@ -6521,7 +6513,7 @@ static struct {
         unsigned long next_balance;     /* in jiffy units */
  } nohz ____cacheline_aligned;
  
-static inline int find_new_ilb(int call_cpu)
+static inline int find_new_ilb(void)
  {
         int ilb = cpumask_first(nohz.idle_cpus_mask);
  
@@ -6536,13 +6528,13 @@ static inline int find_new_ilb(int call_cpu)
   * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
   * CPU (if there is one).
   */
-static void nohz_balancer_kick(int cpu)
+static void nohz_balancer_kick(void)
  {
         int ilb_cpu;
  
         nohz.next_balance++;
  
-       ilb_cpu = find_new_ilb(cpu);
+       ilb_cpu = find_new_ilb();
  
         if (ilb_cpu >= nr_cpu_ids)
                 return;
@@ -6652,10 +6644,10 @@ void update_max_interval(void)
   *
   * Balancing parameters are set up in init_sched_domains.
   */
-static void rebalance_domains(int cpu, enum cpu_idle_type idle)
+static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
  {
         int continue_balancing = 1;
-       struct rq *rq = cpu_rq(cpu);
+       int cpu = rq->cpu;
         unsigned long interval;
         struct sched_domain *sd;
         /* Earliest time when we have to do rebalance again */
@@ -6752,9 +6744,9 @@ out:
   * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
   * rebalancing for all the cpus for whom scheduler ticks are stopped.
   */
-static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
+static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
  {
-       struct rq *this_rq = cpu_rq(this_cpu);
+       int this_cpu = this_rq->cpu;
         struct rq *rq;
         int balance_cpu;
  
@@ -6781,7 +6773,7 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
                 update_idle_cpu_load(rq);
                 raw_spin_unlock_irq(&rq->lock);
  
-               rebalance_domains(balance_cpu, CPU_IDLE);
+               rebalance_domains(rq, CPU_IDLE);
  
                 if (time_after(this_rq->next_balance, rq->next_balance))
                         this_rq->next_balance = rq->next_balance;
@@ -6800,14 +6792,14 @@ end:
   *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
   *     domain span are idle.
   */
-static inline int nohz_kick_needed(struct rq *rq, int cpu)
+static inline int nohz_kick_needed(struct rq *rq)
  {
         unsigned long now = jiffies;
         struct sched_domain *sd;
         struct sched_group_power *sgp;
-       int nr_busy;
+       int nr_busy, cpu = rq->cpu;
  
-       if (unlikely(idle_cpu(cpu)))
+       if (unlikely(rq->idle_balance))
                 return 0;
  
         /*
@@ -6856,7 +6848,7 @@ need_kick:
         return 1;
  }
  #else
-static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
+static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
  #endif
  
  /*
@@ -6865,38 +6857,39 @@ static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
   */
  static void run_rebalance_domains(struct softirq_action *h)
  {
-       int this_cpu = smp_processor_id();
-       struct rq *this_rq = cpu_rq(this_cpu);
+       struct rq *this_rq = this_rq();
         enum cpu_idle_type idle = this_rq->idle_balance ?
                                                 CPU_IDLE : CPU_NOT_IDLE;
  
-       rebalance_domains(this_cpu, idle);
+       rebalance_domains(this_rq, idle);
  
         /*
          * If this cpu has a pending nohz_balance_kick, then do the
          * balancing on behalf of the other idle cpus whose ticks are
          * stopped.
          */
-       nohz_idle_balance(this_cpu, idle);
+       nohz_idle_balance(this_rq, idle);
  }
  
-static inline int on_null_domain(int cpu)
+static inline int on_null_domain(struct rq *rq)
  {
-       return !rcu_dereference_sched(cpu_rq(cpu)->sd);
+       return !rcu_dereference_sched(rq->sd);
  }
  
  /*
   * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
   */
-void trigger_load_balance(struct rq *rq, int cpu)
+void trigger_load_balance(struct rq *rq)
  {
         /* Don't need to rebalance while attached to NULL domain */
-       if (time_after_eq(jiffies, rq->next_balance) &&
-           likely(!on_null_domain(cpu)))
+       if (unlikely(on_null_domain(rq)))
+               return;
+
+       if (time_after_eq(jiffies, rq->next_balance))
                 raise_softirq(SCHED_SOFTIRQ);
  #ifdef CONFIG_NO_HZ_COMMON
-       if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
-               nohz_balancer_kick(cpu);
+       if (nohz_kick_needed(rq))
+               nohz_balancer_kick();
  #endif
  }