Merge tag 'v3.11-rc5' into perf/core
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / sched / fair.c
index 9565645..10d729b 100644 (file)
@@ -3017,6 +3017,23 @@ static unsigned long cpu_avg_load_per_task(int cpu)
        return 0;
 }
 
+static void record_wakee(struct task_struct *p)
+{
+       /*
+        * Rough decay (wiping) for cost saving, don't worry
+        * about the boundary, really active task won't care
+        * about the loss.
+        */
+       if (jiffies > current->wakee_flip_decay_ts + HZ) {
+               current->wakee_flips = 0;
+               current->wakee_flip_decay_ts = jiffies;
+       }
+
+       if (current->last_wakee != p) {
+               current->last_wakee = p;
+               current->wakee_flips++;
+       }
+}
 
 static void task_waking_fair(struct task_struct *p)
 {
@@ -3037,6 +3054,7 @@ static void task_waking_fair(struct task_struct *p)
 #endif
 
        se->vruntime -= min_vruntime;
+       record_wakee(p);
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -3155,6 +3173,28 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 #endif
 
+static int wake_wide(struct task_struct *p)
+{
+       int factor = this_cpu_read(sd_llc_size);
+
+       /*
+        * Yeah, it's the switching-frequency, could means many wakee or
+        * rapidly switch, use factor here will just help to automatically
+        * adjust the loose-degree, so bigger node will lead to more pull.
+        */
+       if (p->wakee_flips > factor) {
+               /*
+                * wakee is somewhat hot, it needs certain amount of cpu
+                * resource, so if waker is far more hot, prefer to leave
+                * it alone.
+                */
+               if (current->wakee_flips > (factor * p->wakee_flips))
+                       return 1;
+       }
+
+       return 0;
+}
+
 static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 {
        s64 this_load, load;
@@ -3164,6 +3204,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
        unsigned long weight;
        int balanced;
 
+       /*
+        * If we wake multiple tasks be careful to not bounce
+        * ourselves around too much.
+        */
+       if (wake_wide(p))
+               return 0;
+
        idx       = sd->wake_idx;
        this_cpu  = smp_processor_id();
        prev_cpu  = task_cpu(p);
@@ -4171,47 +4218,48 @@ static void update_blocked_averages(int cpu)
 }
 
 /*
- * Compute the cpu's hierarchical load factor for each task group.
+ * Compute the hierarchical load factor for cfs_rq and all its ascendants.
  * This needs to be done in a top-down fashion because the load of a child
  * group is a fraction of its parents load.
  */
-static int tg_load_down(struct task_group *tg, void *data)
-{
-       unsigned long load;
-       long cpu = (long)data;
-
-       if (!tg->parent) {
-               load = cpu_rq(cpu)->avg.load_avg_contrib;
-       } else {
-               load = tg->parent->cfs_rq[cpu]->h_load;
-               load = div64_ul(load * tg->se[cpu]->avg.load_avg_contrib,
-                               tg->parent->cfs_rq[cpu]->runnable_load_avg + 1);
-       }
-
-       tg->cfs_rq[cpu]->h_load = load;
-
-       return 0;
-}
-
-static void update_h_load(long cpu)
+static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
 {
-       struct rq *rq = cpu_rq(cpu);
+       struct rq *rq = rq_of(cfs_rq);
+       struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
        unsigned long now = jiffies;
+       unsigned long load;
 
-       if (rq->h_load_throttle == now)
+       if (cfs_rq->last_h_load_update == now)
                return;
 
-       rq->h_load_throttle = now;
+       cfs_rq->h_load_next = NULL;
+       for_each_sched_entity(se) {
+               cfs_rq = cfs_rq_of(se);
+               cfs_rq->h_load_next = se;
+               if (cfs_rq->last_h_load_update == now)
+                       break;
+       }
 
-       rcu_read_lock();
-       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
-       rcu_read_unlock();
+       if (!se) {
+               cfs_rq->h_load = rq->avg.load_avg_contrib;
+               cfs_rq->last_h_load_update = now;
+       }
+
+       while ((se = cfs_rq->h_load_next) != NULL) {
+               load = cfs_rq->h_load;
+               load = div64_ul(load * se->avg.load_avg_contrib,
+                               cfs_rq->runnable_load_avg + 1);
+               cfs_rq = group_cfs_rq(se);
+               cfs_rq->h_load = load;
+               cfs_rq->last_h_load_update = now;
+       }
 }
 
 static unsigned long task_h_load(struct task_struct *p)
 {
        struct cfs_rq *cfs_rq = task_cfs_rq(p);
 
+       update_cfs_rq_h_load(cfs_rq);
        return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
                        cfs_rq->runnable_load_avg + 1);
 }
@@ -4220,10 +4268,6 @@ static inline void update_blocked_averages(int cpu)
 {
 }
 
-static inline void update_h_load(long cpu)
-{
-}
-
 static unsigned long task_h_load(struct task_struct *p)
 {
        return p->se.avg.load_avg_contrib;
@@ -5108,7 +5152,6 @@ redo:
                env.src_rq    = busiest;
                env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
-               update_h_load(env.src_cpu);
 more_balance:
                local_irq_save(flags);
                double_rq_lock(env.dst_rq, busiest);