Merge tag 'v3.11-rc5' into perf/core
authorIngo Molnar <mingo@kernel.org>
Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)
committerIngo Molnar <mingo@kernel.org>
Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)
Merge Linux 3.11-rc5, to sync up with the latest upstream fixes since -rc1.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
1  2 
arch/powerpc/include/asm/perf_event_server.h
include/linux/sched.h
kernel/sched/fair.c

@@@ -12,6 -12,7 +12,7 @@@
  #include <linux/types.h>
  #include <asm/hw_irq.h>
  #include <linux/device.h>
+ #include <uapi/asm/perf_event.h>
  
  #define MAX_HWEVENTS          8
  #define MAX_EVENT_ALTERNATIVES        8
@@@ -69,11 -70,6 +70,6 @@@ struct power_pmu 
  #define PPMU_LIMITED_PMC_REQD 2       /* have to put this on a limited PMC */
  #define PPMU_ONLY_COUNT_RUN   4       /* only counting in run state */
  
- /*
-  * We use the event config bit 63 as a flag to request EBB.
-  */
- #define EVENT_CONFIG_EBB_SHIFT        63
  extern int register_power_pmu(struct power_pmu *);
  
  struct pt_regs;
@@@ -142,11 -138,11 +138,11 @@@ extern ssize_t power_events_sysfs_show(
  #define       EVENT_PTR(_id, _suffix)         &EVENT_VAR(_id, _suffix).attr.attr
  
  #define       EVENT_ATTR(_name, _id, _suffix)                                 \
 -      PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id,    \
 +      PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id,       \
                        power_events_sysfs_show)
  
  #define       GENERIC_EVENT_ATTR(_name, _id)  EVENT_ATTR(_name, _id, _g)
  #define       GENERIC_EVENT_PTR(_id)          EVENT_PTR(_id, _g)
  
 -#define       POWER_EVENT_ATTR(_name, _id)    EVENT_ATTR(PM_##_name, _id, _p)
 +#define       POWER_EVENT_ATTR(_name, _id)    EVENT_ATTR(_name, _id, _p)
  #define       POWER_EVENT_PTR(_id)            EVENT_PTR(_id, _p)
diff --combined include/linux/sched.h
@@@ -1034,9 -1034,6 +1034,9 @@@ struct task_struct 
  #ifdef CONFIG_SMP
        struct llist_node wake_entry;
        int on_cpu;
 +      struct task_struct *last_wakee;
 +      unsigned long wakee_flips;
 +      unsigned long wakee_flip_decay_ts;
  #endif
        int on_rq;
  
@@@ -1631,6 -1628,7 +1631,7 @@@ extern void thread_group_cputime_adjust
  #define PF_MEMPOLICY  0x10000000      /* Non-default NUMA mempolicy */
  #define PF_MUTEX_TESTER       0x20000000      /* Thread belongs to the rt mutex tester */
  #define PF_FREEZER_SKIP       0x40000000      /* Freezer should not count it as freezable */
+ #define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
  
  /*
   * Only the _current_ task can read/write to tsk->flags, but other
diff --combined kernel/sched/fair.c
@@@ -851,7 -851,7 +851,7 @@@ void task_numa_fault(int node, int page
  {
        struct task_struct *p = current;
  
-       if (!sched_feat_numa(NUMA))
+       if (!numabalancing_enabled)
                return;
  
        /* FIXME: Allocate task-specific structure for placement policy here */
@@@ -3017,23 -3017,6 +3017,23 @@@ static unsigned long cpu_avg_load_per_t
        return 0;
  }
  
 +static void record_wakee(struct task_struct *p)
 +{
 +      /*
 +       * Rough decay (wiping) for cost saving, don't worry
 +       * about the boundary, really active task won't care
 +       * about the loss.
 +       */
 +      if (jiffies > current->wakee_flip_decay_ts + HZ) {
 +              current->wakee_flips = 0;
 +              current->wakee_flip_decay_ts = jiffies;
 +      }
 +
 +      if (current->last_wakee != p) {
 +              current->last_wakee = p;
 +              current->wakee_flips++;
 +      }
 +}
  
  static void task_waking_fair(struct task_struct *p)
  {
  #endif
  
        se->vruntime -= min_vruntime;
 +      record_wakee(p);
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
@@@ -3173,28 -3155,6 +3173,28 @@@ static inline unsigned long effective_l
  
  #endif
  
 +static int wake_wide(struct task_struct *p)
 +{
 +      int factor = this_cpu_read(sd_llc_size);
 +
 +      /*
 +       * Yeah, it's the switching-frequency, could means many wakee or
 +       * rapidly switch, use factor here will just help to automatically
 +       * adjust the loose-degree, so bigger node will lead to more pull.
 +       */
 +      if (p->wakee_flips > factor) {
 +              /*
 +               * wakee is somewhat hot, it needs certain amount of cpu
 +               * resource, so if waker is far more hot, prefer to leave
 +               * it alone.
 +               */
 +              if (current->wakee_flips > (factor * p->wakee_flips))
 +                      return 1;
 +      }
 +
 +      return 0;
 +}
 +
  static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
  {
        s64 this_load, load;
        unsigned long weight;
        int balanced;
  
 +      /*
 +       * If we wake multiple tasks be careful to not bounce
 +       * ourselves around too much.
 +       */
 +      if (wake_wide(p))
 +              return 0;
 +
        idx       = sd->wake_idx;
        this_cpu  = smp_processor_id();
        prev_cpu  = task_cpu(p);
@@@ -4218,48 -4171,47 +4218,48 @@@ static void update_blocked_averages(in
  }
  
  /*
 - * Compute the cpu's hierarchical load factor for each task group.
 + * Compute the hierarchical load factor for cfs_rq and all its ascendants.
   * This needs to be done in a top-down fashion because the load of a child
   * group is a fraction of its parents load.
   */
 -static int tg_load_down(struct task_group *tg, void *data)
 -{
 -      unsigned long load;
 -      long cpu = (long)data;
 -
 -      if (!tg->parent) {
 -              load = cpu_rq(cpu)->avg.load_avg_contrib;
 -      } else {
 -              load = tg->parent->cfs_rq[cpu]->h_load;
 -              load = div64_ul(load * tg->se[cpu]->avg.load_avg_contrib,
 -                              tg->parent->cfs_rq[cpu]->runnable_load_avg + 1);
 -      }
 -
 -      tg->cfs_rq[cpu]->h_load = load;
 -
 -      return 0;
 -}
 -
 -static void update_h_load(long cpu)
 +static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
  {
 -      struct rq *rq = cpu_rq(cpu);
 +      struct rq *rq = rq_of(cfs_rq);
 +      struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
        unsigned long now = jiffies;
 +      unsigned long load;
  
 -      if (rq->h_load_throttle == now)
 +      if (cfs_rq->last_h_load_update == now)
                return;
  
 -      rq->h_load_throttle = now;
 +      cfs_rq->h_load_next = NULL;
 +      for_each_sched_entity(se) {
 +              cfs_rq = cfs_rq_of(se);
 +              cfs_rq->h_load_next = se;
 +              if (cfs_rq->last_h_load_update == now)
 +                      break;
 +      }
  
 -      rcu_read_lock();
 -      walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 -      rcu_read_unlock();
 +      if (!se) {
 +              cfs_rq->h_load = rq->avg.load_avg_contrib;
 +              cfs_rq->last_h_load_update = now;
 +      }
 +
 +      while ((se = cfs_rq->h_load_next) != NULL) {
 +              load = cfs_rq->h_load;
 +              load = div64_ul(load * se->avg.load_avg_contrib,
 +                              cfs_rq->runnable_load_avg + 1);
 +              cfs_rq = group_cfs_rq(se);
 +              cfs_rq->h_load = load;
 +              cfs_rq->last_h_load_update = now;
 +      }
  }
  
  static unsigned long task_h_load(struct task_struct *p)
  {
        struct cfs_rq *cfs_rq = task_cfs_rq(p);
  
 +      update_cfs_rq_h_load(cfs_rq);
        return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
                        cfs_rq->runnable_load_avg + 1);
  }
@@@ -4268,6 -4220,10 +4268,6 @@@ static inline void update_blocked_avera
  {
  }
  
 -static inline void update_h_load(long cpu)
 -{
 -}
 -
  static unsigned long task_h_load(struct task_struct *p)
  {
        return p->se.avg.load_avg_contrib;
@@@ -5152,6 -5108,7 +5152,6 @@@ redo
                env.src_rq    = busiest;
                env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
  
 -              update_h_load(env.src_cpu);
  more_balance:
                local_irq_save(flags);
                double_rq_lock(env.dst_rq, busiest);
@@@ -5829,7 -5786,7 +5829,7 @@@ static void task_tick_fair(struct rq *r
                entity_tick(cfs_rq, se, queued);
        }
  
-       if (sched_feat_numa(NUMA))
+       if (numabalancing_enabled)
                task_tick_numa(rq, curr);
  
        update_rq_runnable_avg(rq, 1);