sched: Move up affinity check to mitigate useless redoing overhead

[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 7a33e59..b8ef321 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -431,13 +431,13 @@ void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
   * Scheduling class tree data structure manipulation methods:
   */
  
-static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime)
+static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
  {
-       s64 delta = (s64)(vruntime - min_vruntime);
+       s64 delta = (s64)(vruntime - max_vruntime);
         if (delta > 0)
-               min_vruntime = vruntime;
+               max_vruntime = vruntime;
  
-       return min_vruntime;
+       return max_vruntime;
  }
  
  static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
@@ -473,6 +473,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
                         vruntime = min_vruntime(vruntime, se->vruntime);
         }
  
+       /* ensure we never gain time by being placed backwards. */
         cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
  #ifndef CONFIG_64BIT
         smp_wmb();
@@ -652,7 +653,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
  }
  
  /*
- * We calculate the vruntime slice of a to be inserted task
+ * We calculate the vruntime slice of a to-be-inserted task.
   *
   * vs = s/w
   */
@@ -1562,6 +1563,27 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
                 se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
         } /* migrations, e.g. sleep=0 leave decay_count == 0 */
  }
+
+/*
+ * Update the rq's load with the elapsed running time before entering
+ * idle. if the last scheduled task is not a CFS task, idle_enter will
+ * be the only way to update the runnable statistic.
+ */
+void idle_enter_fair(struct rq *this_rq)
+{
+       update_rq_runnable_avg(this_rq, 1);
+}
+
+/*
+ * Update the rq's load with the elapsed idle time before a task is
+ * scheduled. if the newly scheduled task is not a CFS task, idle_exit will
+ * be the only way to update the runnable statistic.
+ */
+void idle_exit_fair(struct rq *this_rq)
+{
+       update_rq_runnable_avg(this_rq, 0);
+}
+
  #else
  static inline void update_entity_load_avg(struct sched_entity *se,
                                           int update_cfs_rq) {}
@@ -3874,10 +3896,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
         int tsk_cache_hot = 0;
         /*
          * We do not migrate tasks that are:
-        * 1) running (obviously), or
+        * 1) throttled_lb_pair, or
          * 2) cannot be migrated to this CPU due to cpus_allowed, or
-        * 3) are cache-hot on their current CPU.
+        * 3) running (obviously), or
+        * 4) are cache-hot on their current CPU.
          */
+       if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
+               return 0;
+
         if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
                 int new_dst_cpu;
  
@@ -3920,20 +3946,17 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
         tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
         if (!tsk_cache_hot ||
                 env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
-#ifdef CONFIG_SCHEDSTATS
+
                 if (tsk_cache_hot) {
                         schedstat_inc(env->sd, lb_hot_gained[env->idle]);
                         schedstat_inc(p, se.statistics.nr_forced_migrations);
                 }
-#endif
+
                 return 1;
         }
  
-       if (tsk_cache_hot) {
-               schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
-               return 0;
-       }
-       return 1;
+       schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
+       return 0;
  }
  
  /*
@@ -3948,9 +3971,6 @@ static int move_one_task(struct lb_env *env)
         struct task_struct *p, *n;
  
         list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
-               if (throttled_lb_pair(task_group(p), env->src_rq->cpu, env->dst_cpu))
-                       continue;
-
                 if (!can_migrate_task(p, env))
                         continue;
  
@@ -4002,7 +4022,7 @@ static int move_tasks(struct lb_env *env)
                         break;
                 }
  
-               if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
+               if (!can_migrate_task(p, env))
                         goto next;
  
                 load = task_h_load(p);
@@ -4013,9 +4033,6 @@ static int move_tasks(struct lb_env *env)
                 if ((load / 2) > env->imbalance)
                         goto next;
  
-               if (!can_migrate_task(p, env))
-                       goto next;
-
                 move_task(p, env);
                 pulled++;
                 env->imbalance -= load;
@@ -4245,7 +4262,7 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
         return load_idx;
  }
  
-unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
+static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
  {
         return SCHED_POWER_SCALE;
  }
@@ -4255,7 +4272,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
         return default_scale_freq_power(sd, cpu);
  }
  
-unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
  {
         unsigned long weight = sd->span_weight;
         unsigned long smt_gain = sd->smt_gain;
@@ -4270,7 +4287,7 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
         return default_scale_smt_power(sd, cpu);
  }
  
-unsigned long scale_rt_power(int cpu)
+static unsigned long scale_rt_power(int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
         u64 total, available, age_stamp, avg;
@@ -5007,8 +5024,21 @@ static int load_balance(int this_cpu, struct rq *this_rq,
                 .cpus           = cpus,
         };
  
+       /*
+        * For NEWLY_IDLE load_balancing, we don't need to consider
+        * other cpus in our group
+        */
+       if (idle == CPU_NEWLY_IDLE) {
+               env.dst_grpmask = NULL;
+               /*
+                * we don't care max_lb_iterations in this case,
+                * in following patch, this will be removed
+                */
+               max_lb_iterations = 0;
+       } else
+               max_lb_iterations = cpumask_weight(env.dst_grpmask);
+
         cpumask_copy(cpus, cpu_active_mask);
-       max_lb_iterations = cpumask_weight(env.dst_grpmask);
  
         schedstat_inc(sd, lb_count[idle]);
  
@@ -5061,17 +5091,17 @@ more_balance:
                 double_rq_unlock(env.dst_rq, busiest);
                 local_irq_restore(flags);
  
-               if (env.flags & LBF_NEED_BREAK) {
-                       env.flags &= ~LBF_NEED_BREAK;
-                       goto more_balance;
-               }
-
                 /*
                  * some other cpu did the load balance for us.
                  */
                 if (cur_ld_moved && env.dst_cpu != smp_processor_id())
                         resched_cpu(env.dst_cpu);
  
+               if (env.flags & LBF_NEED_BREAK) {
+                       env.flags &= ~LBF_NEED_BREAK;
+                       goto more_balance;
+               }
+
                 /*
                  * Revisit (affine) tasks on src_cpu that couldn't be moved to
                  * us and move them to an alternate dst_cpu in our sched_group
@@ -5219,8 +5249,6 @@ void idle_balance(int this_cpu, struct rq *this_rq)
         if (this_rq->avg_idle < sysctl_sched_migration_cost)
                 return;
  
-       update_rq_runnable_avg(this_rq, 1);
-
         /*
          * Drop the rq->lock, but keep IRQ/preempt disabled.
          */
@@ -5468,7 +5496,7 @@ void update_max_interval(void)
   * It checks each scheduling domain to see if it is due to be balanced,
   * and initiates a balancing operation if so.
   *
- * Balancing parameters are set up in arch_init_sched_domains.
+ * Balancing parameters are set up in init_sched_domains.
   */
  static void rebalance_domains(int cpu, enum cpu_idle_type idle)
  {
@@ -5506,10 +5534,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
                 if (time_after_eq(jiffies, sd->last_balance + interval)) {
                         if (load_balance(cpu, rq, sd, idle, &balance)) {
                                 /*
-                                * We've pulled tasks over so either we're no
-                                * longer idle.
+                                * The LBF_SOME_PINNED logic could have changed
+                                * env->dst_cpu, so we can't know our idle
+                                * state even if we migrated tasks. Update it.
                                  */
-                               idle = CPU_NOT_IDLE;
+                               idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
                         }
                         sd->last_balance = jiffies;
                 }