Merge tag 'v3.11-rc5' into perf/core

author Ingo Molnar <mingo@kernel.org>

Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)

committer Ingo Molnar <mingo@kernel.org>

Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)
author Ingo Molnar <mingo@kernel.org>
Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)
committer Ingo Molnar <mingo@kernel.org>
Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)
diff --combined arch/powerpc/include/asm/perf_event_server.h

index cc5f45b,8b24926..3fd2f1b
--- 1/arch/powerpc/include/asm/perf_event_server.h
--- 2/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@@ -12,6 -12,7 +12,7 @@@
   #include <linux/types.h>
   #include <asm/hw_irq.h>
   #include <linux/device.h>
+ #include <uapi/asm/perf_event.h>
   
   #define MAX_HWEVENTS          8
   #define MAX_EVENT_ALTERNATIVES        8
@@@ -69,11 -70,6 +70,6 @@@ struct power_pmu 
   #define PPMU_LIMITED_PMC_REQD 2       /* have to put this on a limited PMC */
   #define PPMU_ONLY_COUNT_RUN   4       /* only counting in run state */
   
- /*
-  * We use the event config bit 63 as a flag to request EBB.
-  */
- #define EVENT_CONFIG_EBB_SHIFT        63
- 
   extern int register_power_pmu(struct power_pmu *);
   
   struct pt_regs;
@@@ -142,11 -138,11 +138,11 @@@ extern ssize_t power_events_sysfs_show(
   #define       EVENT_PTR(_id, _suffix)         &EVENT_VAR(_id, _suffix).attr.attr
   
   #define       EVENT_ATTR(_name, _id, _suffix)                                 \
- -      PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id,    \
+ +      PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id,       \
                         power_events_sysfs_show)
   
   #define       GENERIC_EVENT_ATTR(_name, _id)  EVENT_ATTR(_name, _id, _g)
   #define       GENERIC_EVENT_PTR(_id)          EVENT_PTR(_id, _g)
   
- -#define       POWER_EVENT_ATTR(_name, _id)    EVENT_ATTR(PM_##_name, _id, _p)
+ +#define       POWER_EVENT_ATTR(_name, _id)    EVENT_ATTR(_name, _id, _p)
   #define       POWER_EVENT_PTR(_id)            EVENT_PTR(_id, _p)
diff --combined include/linux/sched.h

index 4f163a8,d722490..77c887b
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -1034,9 -1034,6 +1034,9 @@@ struct task_struct 
   #ifdef CONFIG_SMP
         struct llist_node wake_entry;
         int on_cpu;
+ +      struct task_struct *last_wakee;
+ +      unsigned long wakee_flips;
+ +      unsigned long wakee_flip_decay_ts;
   #endif
         int on_rq;
   
@@@ -1631,6 -1628,7 +1631,7 @@@ extern void thread_group_cputime_adjust
   #define PF_MEMPOLICY  0x10000000      /* Non-default NUMA mempolicy */
   #define PF_MUTEX_TESTER       0x20000000      /* Thread belongs to the rt mutex tester */
   #define PF_FREEZER_SKIP       0x40000000      /* Freezer should not count it as freezable */
+ #define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
   
   /*
    * Only the _current_ task can read/write to tsk->flags, but other
diff --combined kernel/sched/fair.c

index f237437,9565645..10d729b
--- 1/kernel/sched/fair.c
--- 2/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@@ -851,7 -851,7 +851,7 @@@ void task_numa_fault(int node, int page
   {
         struct task_struct *p = current;
   
-       if (!sched_feat_numa(NUMA))
+       if (!numabalancing_enabled)
                 return;
   
         /* FIXME: Allocate task-specific structure for placement policy here */
@@@ -3017,23 -3017,6 +3017,23 @@@ static unsigned long cpu_avg_load_per_t
         return 0;
   }
   
+ +static void record_wakee(struct task_struct *p)
+ +{
+ +      /*
+ +       * Rough decay (wiping) for cost saving, don't worry
+ +       * about the boundary, really active task won't care
+ +       * about the loss.
+ +       */
+ +      if (jiffies > current->wakee_flip_decay_ts + HZ) {
+ +              current->wakee_flips = 0;
+ +              current->wakee_flip_decay_ts = jiffies;
+ +      }
+ +
+ +      if (current->last_wakee != p) {
+ +              current->last_wakee = p;
+ +              current->wakee_flips++;
+ +      }
+ +}
   
   static void task_waking_fair(struct task_struct *p)
   {
@@@ -3054,7 -3037,6 +3054,7 @@@
   #endif
   
         se->vruntime -= min_vruntime;
+ +      record_wakee(p);
   }
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
@@@ -3173,28 -3155,6 +3173,28 @@@ static inline unsigned long effective_l
   
   #endif
   
+ +static int wake_wide(struct task_struct *p)
+ +{
+ +      int factor = this_cpu_read(sd_llc_size);
+ +
+ +      /*
+ +       * Yeah, it's the switching-frequency, could means many wakee or
+ +       * rapidly switch, use factor here will just help to automatically
+ +       * adjust the loose-degree, so bigger node will lead to more pull.
+ +       */
+ +      if (p->wakee_flips > factor) {
+ +              /*
+ +               * wakee is somewhat hot, it needs certain amount of cpu
+ +               * resource, so if waker is far more hot, prefer to leave
+ +               * it alone.
+ +               */
+ +              if (current->wakee_flips > (factor * p->wakee_flips))
+ +                      return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
   {
         s64 this_load, load;
@@@ -3204,13 -3164,6 +3204,13 @@@
         unsigned long weight;
         int balanced;
   
+ +      /*
+ +       * If we wake multiple tasks be careful to not bounce
+ +       * ourselves around too much.
+ +       */
+ +      if (wake_wide(p))
+ +              return 0;
+ +
         idx       = sd->wake_idx;
         this_cpu  = smp_processor_id();
         prev_cpu  = task_cpu(p);
@@@ -4218,48 -4171,47 +4218,48 @@@ static void update_blocked_averages(in
   }
   
   /*
- - * Compute the cpu's hierarchical load factor for each task group.
+ + * Compute the hierarchical load factor for cfs_rq and all its ascendants.
    * This needs to be done in a top-down fashion because the load of a child
    * group is a fraction of its parents load.
    */
- -static int tg_load_down(struct task_group *tg, void *data)
- -{
- -      unsigned long load;
- -      long cpu = (long)data;
- -
- -      if (!tg->parent) {
- -              load = cpu_rq(cpu)->avg.load_avg_contrib;
- -      } else {
- -              load = tg->parent->cfs_rq[cpu]->h_load;
- -              load = div64_ul(load * tg->se[cpu]->avg.load_avg_contrib,
- -                              tg->parent->cfs_rq[cpu]->runnable_load_avg + 1);
- -      }
- -
- -      tg->cfs_rq[cpu]->h_load = load;
- -
- -      return 0;
- -}
- -
- -static void update_h_load(long cpu)
+ +static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
   {
- -      struct rq *rq = cpu_rq(cpu);
+ +      struct rq *rq = rq_of(cfs_rq);
+ +      struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
         unsigned long now = jiffies;
+ +      unsigned long load;
   
- -      if (rq->h_load_throttle == now)
+ +      if (cfs_rq->last_h_load_update == now)
                 return;
   
- -      rq->h_load_throttle = now;
+ +      cfs_rq->h_load_next = NULL;
+ +      for_each_sched_entity(se) {
+ +              cfs_rq = cfs_rq_of(se);
+ +              cfs_rq->h_load_next = se;
+ +              if (cfs_rq->last_h_load_update == now)
+ +                      break;
+ +      }
   
- -      rcu_read_lock();
- -      walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
- -      rcu_read_unlock();
+ +      if (!se) {
+ +              cfs_rq->h_load = rq->avg.load_avg_contrib;
+ +              cfs_rq->last_h_load_update = now;
+ +      }
+ +
+ +      while ((se = cfs_rq->h_load_next) != NULL) {
+ +              load = cfs_rq->h_load;
+ +              load = div64_ul(load * se->avg.load_avg_contrib,
+ +                              cfs_rq->runnable_load_avg + 1);
+ +              cfs_rq = group_cfs_rq(se);
+ +              cfs_rq->h_load = load;
+ +              cfs_rq->last_h_load_update = now;
+ +      }
   }
   
   static unsigned long task_h_load(struct task_struct *p)
   {
         struct cfs_rq *cfs_rq = task_cfs_rq(p);
   
+ +      update_cfs_rq_h_load(cfs_rq);
         return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load,
                         cfs_rq->runnable_load_avg + 1);
   }
@@@ -4268,6 -4220,10 +4268,6 @@@ static inline void update_blocked_avera
   {
   }
   
- -static inline void update_h_load(long cpu)
- -{
- -}
- -
   static unsigned long task_h_load(struct task_struct *p)
   {
         return p->se.avg.load_avg_contrib;
@@@ -5152,6 -5108,7 +5152,6 @@@ redo
                 env.src_rq    = busiest;
                 env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
   
- -              update_h_load(env.src_cpu);
   more_balance:
                 local_irq_save(flags);
                 double_rq_lock(env.dst_rq, busiest);
@@@ -5829,7 -5786,7 +5829,7 @@@ static void task_tick_fair(struct rq *r
                 entity_tick(cfs_rq, se, queued);
         }
   
-       if (sched_feat_numa(NUMA))
+       if (numabalancing_enabled)
                 task_tick_numa(rq, curr);
   
         update_rq_runnable_avg(rq, 1);
author	Ingo Molnar <mingo@kernel.org>
	Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 15 Aug 2013 08:00:09 +0000 (10:00 +0200)
		1	2
arch/powerpc/include/asm/perf_event_server.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/fair.c	patch \|	diff1 \|	diff2 \|	blob \| history