Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial

[platform/kernel/linux-exynos.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 3ae75f5..d711093 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -717,18 +717,12 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
  }
  
  #ifdef CONFIG_SMP
+
+#include "sched-pelt.h"
+
  static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
  static unsigned long task_h_load(struct task_struct *p);
  
-/*
- * We choose a half-life close to 1 scheduling period.
- * Note: The tables runnable_avg_yN_inv and runnable_avg_yN_sum are
- * dependent on this value.
- */
-#define LOAD_AVG_PERIOD 32
-#define LOAD_AVG_MAX 47742 /* maximum possible load avg */
-#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_AVG_MAX */
-
  /* Give new sched_entity start runnable values to heavy its load in infant time */
  void init_entity_runnable_average(struct sched_entity *se)
  {
@@ -2733,47 +2727,15 @@ static inline void update_cfs_shares(struct sched_entity *se)
  #endif /* CONFIG_FAIR_GROUP_SCHED */
  
  #ifdef CONFIG_SMP
-/* Precomputed fixed inverse multiplies for multiplication by y^n */
-static const u32 runnable_avg_yN_inv[] = {
-       0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
-       0xe0ccdeeb, 0xdbfbb796, 0xd744fcc9, 0xd2a81d91, 0xce248c14, 0xc9b9bd85,
-       0xc5672a10, 0xc12c4cc9, 0xbd08a39e, 0xb8fbaf46, 0xb504f333, 0xb123f581,
-       0xad583ee9, 0xa9a15ab4, 0xa5fed6a9, 0xa2704302, 0x9ef5325f, 0x9b8d39b9,
-       0x9837f050, 0x94f4efa8, 0x91c3d373, 0x8ea4398a, 0x8b95c1e3, 0x88980e80,
-       0x85aac367, 0x82cd8698,
-};
-
-/*
- * Precomputed \Sum y^k { 1<=k<=n }.  These are floor(true_value) to prevent
- * over-estimates when re-combining.
- */
-static const u32 runnable_avg_yN_sum[] = {
-           0, 1002, 1982, 2941, 3880, 4798, 5697, 6576, 7437, 8279, 9103,
-        9909,10698,11470,12226,12966,13690,14398,15091,15769,16433,17082,
-       17718,18340,18949,19545,20128,20698,21256,21802,22336,22859,23371,
-};
-
-/*
- * Precomputed \Sum y^k { 1<=k<=n, where n%32=0). Values are rolled down to
- * lower integers. See Documentation/scheduler/sched-avg.txt how these
- * were generated:
- */
-static const u32 __accumulated_sum_N32[] = {
-           0, 23371, 35056, 40899, 43820, 45281,
-       46011, 46376, 46559, 46650, 46696, 46719,
-};
-
  /*
   * Approximate:
   *   val * y^n,    where y^32 ~= 0.5 (~1 scheduling period)
   */
-static __always_inline u64 decay_load(u64 val, u64 n)
+static u64 decay_load(u64 val, u64 n)
  {
         unsigned int local_n;
  
-       if (!n)
-               return val;
-       else if (unlikely(n > LOAD_AVG_PERIOD * 63))
+       if (unlikely(n > LOAD_AVG_PERIOD * 63))
                 return 0;
  
         /* after bounds checking we can collapse to 32-bit */
@@ -2795,30 +2757,97 @@ static __always_inline u64 decay_load(u64 val, u64 n)
         return val;
  }
  
+static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
+{
+       u32 c1, c2, c3 = d3; /* y^0 == 1 */
+
+       /*
+        * c1 = d1 y^p
+        */
+       c1 = decay_load((u64)d1, periods);
+
+       /*
+        *            p-1
+        * c2 = 1024 \Sum y^n
+        *            n=1
+        *
+        *              inf        inf
+        *    = 1024 ( \Sum y^n - \Sum y^n - y^0 )
+        *              n=0        n=p
+        */
+       c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
+
+       return c1 + c2 + c3;
+}
+
+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+
  /*
- * For updates fully spanning n periods, the contribution to runnable
- * average will be: \Sum 1024*y^n
+ * Accumulate the three separate parts of the sum; d1 the remainder
+ * of the last (incomplete) period, d2 the span of full periods and d3
+ * the remainder of the (incomplete) current period.
+ *
+ *           d1          d2           d3
+ *           ^           ^            ^
+ *           |           |            |
+ *         |<->|<----------------->|<--->|
+ * ... |---x---|------| ... |------|-----x (now)
+ *
+ *                           p-1
+ * u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
+ *                           n=1
   *
- * We can compute this reasonably efficiently by combining:
- *   y^PERIOD = 1/2 with precomputed \Sum 1024*y^n {for  n <PERIOD}
+ *    = u y^p +                                        (Step 1)
+ *
+ *                     p-1
+ *      d1 y^p + 1024 \Sum y^n + d3 y^0                (Step 2)
+ *                     n=1
   */
-static u32 __compute_runnable_contrib(u64 n)
+static __always_inline u32
+accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
+              unsigned long weight, int running, struct cfs_rq *cfs_rq)
  {
-       u32 contrib = 0;
+       unsigned long scale_freq, scale_cpu;
+       u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
+       u64 periods;
  
-       if (likely(n <= LOAD_AVG_PERIOD))
-               return runnable_avg_yN_sum[n];
-       else if (unlikely(n >= LOAD_AVG_MAX_N))
-               return LOAD_AVG_MAX;
+       scale_freq = arch_scale_freq_capacity(NULL, cpu);
+       scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
  
-       /* Since n < LOAD_AVG_MAX_N, n/LOAD_AVG_PERIOD < 11 */
-       contrib = __accumulated_sum_N32[n/LOAD_AVG_PERIOD];
-       n %= LOAD_AVG_PERIOD;
-       contrib = decay_load(contrib, n);
-       return contrib + runnable_avg_yN_sum[n];
-}
+       delta += sa->period_contrib;
+       periods = delta / 1024; /* A period is 1024us (~1ms) */
  
-#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
+       /*
+        * Step 1: decay old *_sum if we crossed period boundaries.
+        */
+       if (periods) {
+               sa->load_sum = decay_load(sa->load_sum, periods);
+               if (cfs_rq) {
+                       cfs_rq->runnable_load_sum =
+                               decay_load(cfs_rq->runnable_load_sum, periods);
+               }
+               sa->util_sum = decay_load((u64)(sa->util_sum), periods);
+
+               /*
+                * Step 2
+                */
+               delta %= 1024;
+               contrib = __accumulate_pelt_segments(periods,
+                               1024 - sa->period_contrib, delta);
+       }
+       sa->period_contrib = delta;
+
+       contrib = cap_scale(contrib, scale_freq);
+       if (weight) {
+               sa->load_sum += weight * contrib;
+               if (cfs_rq)
+                       cfs_rq->runnable_load_sum += weight * contrib;
+       }
+       if (running)
+               sa->util_sum += contrib * scale_cpu;
+
+       return periods;
+}
  
  /*
   * We can represent the historical contribution to runnable average as the
@@ -2849,13 +2878,10 @@ static u32 __compute_runnable_contrib(u64 n)
   *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
   */
  static __always_inline int
-__update_load_avg(u64 now, int cpu, struct sched_avg *sa,
+___update_load_avg(u64 now, int cpu, struct sched_avg *sa,
                   unsigned long weight, int running, struct cfs_rq *cfs_rq)
  {
-       u64 delta, scaled_delta, periods;
-       u32 contrib;
-       unsigned int delta_w, scaled_delta_w, decayed = 0;
-       unsigned long scale_freq, scale_cpu;
+       u64 delta;
  
         delta = now - sa->last_update_time;
         /*
@@ -2874,83 +2900,52 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
         delta >>= 10;
         if (!delta)
                 return 0;
-       sa->last_update_time = now;
-
-       scale_freq = arch_scale_freq_capacity(NULL, cpu);
-       scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
-
-       /* delta_w is the amount already accumulated against our next period */
-       delta_w = sa->period_contrib;
-       if (delta + delta_w >= 1024) {
-               decayed = 1;
  
-               /* how much left for next period will start over, we don't know yet */
-               sa->period_contrib = 0;
+       sa->last_update_time += delta << 10;
  
-               /*
-                * Now that we know we're crossing a period boundary, figure
-                * out how much from delta we need to complete the current
-                * period and accrue it.
-                */
-               delta_w = 1024 - delta_w;
-               scaled_delta_w = cap_scale(delta_w, scale_freq);
-               if (weight) {
-                       sa->load_sum += weight * scaled_delta_w;
-                       if (cfs_rq) {
-                               cfs_rq->runnable_load_sum +=
-                                               weight * scaled_delta_w;
-                       }
-               }
-               if (running)
-                       sa->util_sum += scaled_delta_w * scale_cpu;
-
-               delta -= delta_w;
-
-               /* Figure out how many additional periods this update spans */
-               periods = delta / 1024;
-               delta %= 1024;
+       /*
+        * Now we know we crossed measurement unit boundaries. The *_avg
+        * accrues by two steps:
+        *
+        * Step 1: accumulate *_sum since last_update_time. If we haven't
+        * crossed period boundaries, finish.
+        */
+       if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq))
+               return 0;
  
-               sa->load_sum = decay_load(sa->load_sum, periods + 1);
-               if (cfs_rq) {
-                       cfs_rq->runnable_load_sum =
-                               decay_load(cfs_rq->runnable_load_sum, periods + 1);
-               }
-               sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
-
-               /* Efficiently calculate \sum (1..n_period) 1024*y^i */
-               contrib = __compute_runnable_contrib(periods);
-               contrib = cap_scale(contrib, scale_freq);
-               if (weight) {
-                       sa->load_sum += weight * contrib;
-                       if (cfs_rq)
-                               cfs_rq->runnable_load_sum += weight * contrib;
-               }
-               if (running)
-                       sa->util_sum += contrib * scale_cpu;
+       /*
+        * Step 2: update *_avg.
+        */
+       sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
+       if (cfs_rq) {
+               cfs_rq->runnable_load_avg =
+                       div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
         }
+       sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
  
-       /* Remainder of delta accrued against u_0` */
-       scaled_delta = cap_scale(delta, scale_freq);
-       if (weight) {
-               sa->load_sum += weight * scaled_delta;
-               if (cfs_rq)
-                       cfs_rq->runnable_load_sum += weight * scaled_delta;
-       }
-       if (running)
-               sa->util_sum += scaled_delta * scale_cpu;
+       return 1;
+}
  
-       sa->period_contrib += delta;
+static int
+__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
+{
+       return ___update_load_avg(now, cpu, &se->avg, 0, 0, NULL);
+}
  
-       if (decayed) {
-               sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
-               if (cfs_rq) {
-                       cfs_rq->runnable_load_avg =
-                               div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
-               }
-               sa->util_avg = sa->util_sum / LOAD_AVG_MAX;
-       }
+static int
+__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       return ___update_load_avg(now, cpu, &se->avg,
+                                 se->on_rq * scale_load_down(se->load.weight),
+                                 cfs_rq->curr == se, NULL);
+}
  
-       return decayed;
+static int
+__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
+{
+       return ___update_load_avg(now, cpu, &cfs_rq->avg,
+                       scale_load_down(cfs_rq->load.weight),
+                       cfs_rq->curr != NULL, cfs_rq);
  }
  
  /*
@@ -3014,6 +3009,9 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
  void set_task_rq_fair(struct sched_entity *se,
                       struct cfs_rq *prev, struct cfs_rq *next)
  {
+       u64 p_last_update_time;
+       u64 n_last_update_time;
+
         if (!sched_feat(ATTACH_AGE_LOAD))
                 return;
  
@@ -3024,11 +3022,11 @@ void set_task_rq_fair(struct sched_entity *se,
          * time. This will result in the wakee task is less decayed, but giving
          * the wakee more load sounds not bad.
          */
-       if (se->avg.last_update_time && prev) {
-               u64 p_last_update_time;
-               u64 n_last_update_time;
+       if (!(se->avg.last_update_time && prev))
+               return;
  
  #ifndef CONFIG_64BIT
+       {
                 u64 p_last_update_time_copy;
                 u64 n_last_update_time_copy;
  
@@ -3043,14 +3041,13 @@ void set_task_rq_fair(struct sched_entity *se,
  
                 } while (p_last_update_time != p_last_update_time_copy ||
                          n_last_update_time != n_last_update_time_copy);
+       }
  #else
-               p_last_update_time = prev->avg.last_update_time;
-               n_last_update_time = next->avg.last_update_time;
+       p_last_update_time = prev->avg.last_update_time;
+       n_last_update_time = next->avg.last_update_time;
  #endif
-               __update_load_avg(p_last_update_time, cpu_of(rq_of(prev)),
-                                 &se->avg, 0, 0, NULL);
-               se->avg.last_update_time = n_last_update_time;
-       }
+       __update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se);
+       se->avg.last_update_time = n_last_update_time;
  }
  
  /* Take into account change of utilization of a child task group */
@@ -3173,6 +3170,36 @@ static inline int propagate_entity_load_avg(struct sched_entity *se)
         return 1;
  }
  
+/*
+ * Check if we need to update the load and the utilization of a blocked
+ * group_entity:
+ */
+static inline bool skip_blocked_update(struct sched_entity *se)
+{
+       struct cfs_rq *gcfs_rq = group_cfs_rq(se);
+
+       /*
+        * If sched_entity still have not zero load or utilization, we have to
+        * decay it:
+        */
+       if (se->avg.load_avg || se->avg.util_avg)
+               return false;
+
+       /*
+        * If there is a pending propagation, we have to update the load and
+        * the utilization of the sched_entity:
+        */
+       if (gcfs_rq->propagate_avg)
+               return false;
+
+       /*
+        * Otherwise, the load and the utilization of the sched_entity is
+        * already zero and there is no pending propagation, so it will be a
+        * waste of time to try to decay it:
+        */
+       return true;
+}
+
  #else /* CONFIG_FAIR_GROUP_SCHED */
  
  static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
@@ -3265,8 +3292,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
                 set_tg_cfs_propagate(cfs_rq);
         }
  
-       decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
-               scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
+       decayed = __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq);
  
  #ifndef CONFIG_64BIT
         smp_wmb();
@@ -3298,11 +3324,8 @@ static inline void update_load_avg(struct sched_entity *se, int flags)
          * Track task load average for carrying it to new CPU after migrated, and
          * track group sched_entity load average for task_h_load calc in migration
          */
-       if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) {
-               __update_load_avg(now, cpu, &se->avg,
-                         se->on_rq * scale_load_down(se->load.weight),
-                         cfs_rq->curr == se, NULL);
-       }
+       if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
+               __update_load_avg_se(now, cpu, cfs_rq, se);
  
         decayed  = update_cfs_rq_load_avg(now, cfs_rq, true);
         decayed |= propagate_entity_load_avg(se);
@@ -3407,7 +3430,7 @@ void sync_entity_load_avg(struct sched_entity *se)
         u64 last_update_time;
  
         last_update_time = cfs_rq_last_update_time(cfs_rq);
-       __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
+       __update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se);
  }
  
  /*
@@ -4271,8 +4294,9 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
         list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
                                 throttled_list) {
                 struct rq *rq = rq_of(cfs_rq);
+               struct rq_flags rf;
  
-               raw_spin_lock(&rq->lock);
+               rq_lock(rq, &rf);
                 if (!cfs_rq_throttled(cfs_rq))
                         goto next;
  
@@ -4289,7 +4313,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
                         unthrottle_cfs_rq(cfs_rq);
  
  next:
-               raw_spin_unlock(&rq->lock);
+               rq_unlock(rq, &rf);
  
                 if (!remaining)
                         break;
@@ -5097,15 +5121,16 @@ void cpu_load_update_nohz_stop(void)
         unsigned long curr_jiffies = READ_ONCE(jiffies);
         struct rq *this_rq = this_rq();
         unsigned long load;
+       struct rq_flags rf;
  
         if (curr_jiffies == this_rq->last_load_update_tick)
                 return;
  
         load = weighted_cpuload(cpu_of(this_rq));
-       raw_spin_lock(&this_rq->lock);
+       rq_lock(this_rq, &rf);
         update_rq_clock(this_rq);
         cpu_load_update_nohz(this_rq, curr_jiffies, load);
-       raw_spin_unlock(&this_rq->lock);
+       rq_unlock(this_rq, &rf);
  }
  #else /* !CONFIG_NO_HZ_COMMON */
  static inline void cpu_load_update_nohz(struct rq *this_rq,
@@ -6769,7 +6794,7 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
         lockdep_assert_held(&env->src_rq->lock);
  
         p->on_rq = TASK_ON_RQ_MIGRATING;
-       deactivate_task(env->src_rq, p, 0);
+       deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
         set_task_cpu(p, env->dst_cpu);
  }
  
@@ -6902,7 +6927,7 @@ static void attach_task(struct rq *rq, struct task_struct *p)
         lockdep_assert_held(&rq->lock);
  
         BUG_ON(task_rq(p) != rq);
-       activate_task(rq, p, 0);
+       activate_task(rq, p, ENQUEUE_NOCLOCK);
         p->on_rq = TASK_ON_RQ_QUEUED;
         check_preempt_curr(rq, p, 0);
  }
@@ -6913,9 +6938,12 @@ static void attach_task(struct rq *rq, struct task_struct *p)
   */
  static void attach_one_task(struct rq *rq, struct task_struct *p)
  {
-       raw_spin_lock(&rq->lock);
+       struct rq_flags rf;
+
+       rq_lock(rq, &rf);
+       update_rq_clock(rq);
         attach_task(rq, p);
-       raw_spin_unlock(&rq->lock);
+       rq_unlock(rq, &rf);
  }
  
  /*
@@ -6926,8 +6954,10 @@ static void attach_tasks(struct lb_env *env)
  {
         struct list_head *tasks = &env->tasks;
         struct task_struct *p;
+       struct rq_flags rf;
  
-       raw_spin_lock(&env->dst_rq->lock);
+       rq_lock(env->dst_rq, &rf);
+       update_rq_clock(env->dst_rq);
  
         while (!list_empty(tasks)) {
                 p = list_first_entry(tasks, struct task_struct, se.group_node);
@@ -6936,7 +6966,7 @@ static void attach_tasks(struct lb_env *env)
                 attach_task(env->dst_rq, p);
         }
  
-       raw_spin_unlock(&env->dst_rq->lock);
+       rq_unlock(env->dst_rq, &rf);
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6944,9 +6974,9 @@ static void update_blocked_averages(int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
         struct cfs_rq *cfs_rq;
-       unsigned long flags;
+       struct rq_flags rf;
  
-       raw_spin_lock_irqsave(&rq->lock, flags);
+       rq_lock_irqsave(rq, &rf);
         update_rq_clock(rq);
  
         /*
@@ -6954,6 +6984,8 @@ static void update_blocked_averages(int cpu)
          * list_add_leaf_cfs_rq() for details.
          */
         for_each_leaf_cfs_rq(rq, cfs_rq) {
+               struct sched_entity *se;
+
                 /* throttled entities do not contribute to load */
                 if (throttled_hierarchy(cfs_rq))
                         continue;
@@ -6961,11 +6993,12 @@ static void update_blocked_averages(int cpu)
                 if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
                         update_tg_load_avg(cfs_rq, 0);
  
-               /* Propagate pending load changes to the parent */
-               if (cfs_rq->tg->se[cpu])
-                       update_load_avg(cfs_rq->tg->se[cpu], 0);
+               /* Propagate pending load changes to the parent, if any: */
+               se = cfs_rq->tg->se[cpu];
+               if (se && !skip_blocked_update(se))
+                       update_load_avg(se, 0);
         }
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       rq_unlock_irqrestore(rq, &rf);
  }
  
  /*
@@ -7019,12 +7052,12 @@ static inline void update_blocked_averages(int cpu)
  {
         struct rq *rq = cpu_rq(cpu);
         struct cfs_rq *cfs_rq = &rq->cfs;
-       unsigned long flags;
+       struct rq_flags rf;
  
-       raw_spin_lock_irqsave(&rq->lock, flags);
+       rq_lock_irqsave(rq, &rf);
         update_rq_clock(rq);
         update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
-       raw_spin_unlock_irqrestore(&rq->lock, flags);
+       rq_unlock_irqrestore(rq, &rf);
  }
  
  static unsigned long task_h_load(struct task_struct *p)
@@ -7525,6 +7558,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
  {
         struct sched_domain *child = env->sd->child;
         struct sched_group *sg = env->sd->groups;
+       struct sg_lb_stats *local = &sds->local_stat;
         struct sg_lb_stats tmp_sgs;
         int load_idx, prefer_sibling = 0;
         bool overload = false;
@@ -7541,7 +7575,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
                 local_group = cpumask_test_cpu(env->dst_cpu, sched_group_cpus(sg));
                 if (local_group) {
                         sds->local = sg;
-                       sgs = &sds->local_stat;
+                       sgs = local;
  
                         if (env->idle != CPU_NEWLY_IDLE ||
                             time_after_eq(jiffies, sg->sgc->next_update))
@@ -7565,8 +7599,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
                  * the tasks on the system).
                  */
                 if (prefer_sibling && sds->local &&
-                   group_has_capacity(env, &sds->local_stat) &&
-                   (sgs->sum_nr_running > 1)) {
+                   group_has_capacity(env, local) &&
+                   (sgs->sum_nr_running > local->sum_nr_running + 1)) {
                         sgs->group_no_capacity = 1;
                         sgs->group_type = group_classify(sg, sgs);
                 }
@@ -8042,7 +8076,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
         struct sched_domain *sd_parent = sd->parent;
         struct sched_group *group;
         struct rq *busiest;
-       unsigned long flags;
+       struct rq_flags rf;
         struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
  
         struct lb_env env = {
@@ -8105,7 +8139,7 @@ redo:
                 env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
  
  more_balance:
-               raw_spin_lock_irqsave(&busiest->lock, flags);
+               rq_lock_irqsave(busiest, &rf);
                 update_rq_clock(busiest);
  
                 /*
@@ -8122,14 +8156,14 @@ more_balance:
                  * See task_rq_lock() family for the details.
                  */
  
-               raw_spin_unlock(&busiest->lock);
+               rq_unlock(busiest, &rf);
  
                 if (cur_ld_moved) {
                         attach_tasks(&env);
                         ld_moved += cur_ld_moved;
                 }
  
-               local_irq_restore(flags);
+               local_irq_restore(rf.flags);
  
                 if (env.flags & LBF_NEED_BREAK) {
                         env.flags &= ~LBF_NEED_BREAK;
@@ -8207,6 +8241,8 @@ more_balance:
                         sd->nr_balance_failed++;
  
                 if (need_active_balance(&env)) {
+                       unsigned long flags;
+
                         raw_spin_lock_irqsave(&busiest->lock, flags);
  
                         /* don't kick the active_load_balance_cpu_stop,
@@ -8444,8 +8480,9 @@ static int active_load_balance_cpu_stop(void *data)
         struct rq *target_rq = cpu_rq(target_cpu);
         struct sched_domain *sd;
         struct task_struct *p = NULL;
+       struct rq_flags rf;
  
-       raw_spin_lock_irq(&busiest_rq->lock);
+       rq_lock_irq(busiest_rq, &rf);
  
         /* make sure the requested cpu hasn't gone down in the meantime */
         if (unlikely(busiest_cpu != smp_processor_id() ||
@@ -8496,7 +8533,7 @@ static int active_load_balance_cpu_stop(void *data)
         rcu_read_unlock();
  out_unlock:
         busiest_rq->active_balance = 0;
-       raw_spin_unlock(&busiest_rq->lock);
+       rq_unlock(busiest_rq, &rf);
  
         if (p)
                 attach_one_task(target_rq, p);
@@ -8794,10 +8831,13 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
                  * do the balance.
                  */
                 if (time_after_eq(jiffies, rq->next_balance)) {
-                       raw_spin_lock_irq(&rq->lock);
+                       struct rq_flags rf;
+
+                       rq_lock_irq(rq, &rf);
                         update_rq_clock(rq);
                         cpu_load_update_idle(rq);
-                       raw_spin_unlock_irq(&rq->lock);
+                       rq_unlock_irq(rq, &rf);
+
                         rebalance_domains(rq, CPU_IDLE);
                 }
  
@@ -8988,8 +9028,9 @@ static void task_fork_fair(struct task_struct *p)
         struct cfs_rq *cfs_rq;
         struct sched_entity *se = &p->se, *curr;
         struct rq *rq = this_rq();
+       struct rq_flags rf;
  
-       raw_spin_lock(&rq->lock);
+       rq_lock(rq, &rf);
         update_rq_clock(rq);
  
         cfs_rq = task_cfs_rq(current);
@@ -9010,7 +9051,7 @@ static void task_fork_fair(struct task_struct *p)
         }
  
         se->vruntime -= cfs_rq->min_vruntime;
-       raw_spin_unlock(&rq->lock);
+       rq_unlock(rq, &rf);
  }
  
  /*
@@ -9372,7 +9413,6 @@ static DEFINE_MUTEX(shares_mutex);
  int sched_group_set_shares(struct task_group *tg, unsigned long shares)
  {
         int i;
-       unsigned long flags;
  
         /*
          * We can't change the weight of the root cgroup.
@@ -9389,19 +9429,17 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
         tg->shares = shares;
         for_each_possible_cpu(i) {
                 struct rq *rq = cpu_rq(i);
-               struct sched_entity *se;
+               struct sched_entity *se = tg->se[i];
+               struct rq_flags rf;
  
-               se = tg->se[i];
                 /* Propagate contribution to hierarchy */
-               raw_spin_lock_irqsave(&rq->lock, flags);
-
-               /* Possible calls to update_curr() need rq clock */
+               rq_lock_irqsave(rq, &rf);
                 update_rq_clock(rq);
                 for_each_sched_entity(se) {
                         update_load_avg(se, UPDATE_TG);
                         update_cfs_shares(se);
                 }
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
+               rq_unlock_irqrestore(rq, &rf);
         }
  
  done: