Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 5c38db5..10bff55 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -287,7 +287,6 @@ extern void trap_init(void);
  extern void account_process_tick(struct task_struct *task, int user);
  extern void update_process_times(int user);
  extern void scheduler_tick(void);
-extern void hrtick_resched(void);
  
  extern void sched_show_task(struct task_struct *p);
  
@@ -1665,6 +1664,7 @@ extern unsigned int sysctl_sched_features;
  extern unsigned int sysctl_sched_migration_cost;
  extern unsigned int sysctl_sched_nr_migrate;
  extern unsigned int sysctl_sched_shares_ratelimit;
+extern unsigned int sysctl_sched_shares_thresh;
  
  int sched_nr_latency_handler(struct ctl_table *table, int write,
                 struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c

index d906f72..945a97b 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -819,6 +819,13 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
  unsigned int sysctl_sched_shares_ratelimit = 250000;
  
  /*
+ * Inject some fuzzyness into changing the per-cpu group shares
+ * this avoids remote rq-locks at the expense of fairness.
+ * default: 4
+ */
+unsigned int sysctl_sched_shares_thresh = 4;
+
+/*
   * period over which we measure -rt task cpu usage in us.
   * default: 1s
   */
@@ -1454,8 +1461,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
   * Calculate and set the cpu's group shares.
   */
  static void
-__update_group_shares_cpu(struct task_group *tg, int cpu,
-                         unsigned long sd_shares, unsigned long sd_rq_weight)
+update_group_shares_cpu(struct task_group *tg, int cpu,
+                       unsigned long sd_shares, unsigned long sd_rq_weight)
  {
         int boost = 0;
         unsigned long shares;
@@ -1486,19 +1493,23 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
          *
          */
         shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+       shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
  
-       /*
-        * record the actual number of shares, not the boosted amount.
-        */
-       tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
-       tg->cfs_rq[cpu]->rq_weight = rq_weight;
+       if (abs(shares - tg->se[cpu]->load.weight) >
+                       sysctl_sched_shares_thresh) {
+               struct rq *rq = cpu_rq(cpu);
+               unsigned long flags;
  
-       if (shares < MIN_SHARES)
-               shares = MIN_SHARES;
-       else if (shares > MAX_SHARES)
-               shares = MAX_SHARES;
+               spin_lock_irqsave(&rq->lock, flags);
+               /*
+                * record the actual number of shares, not the boosted amount.
+                */
+               tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
+               tg->cfs_rq[cpu]->rq_weight = rq_weight;
  
-       __set_se_shares(tg->se[cpu], shares);
+               __set_se_shares(tg->se[cpu], shares);
+               spin_unlock_irqrestore(&rq->lock, flags);
+       }
  }
  
  /*
@@ -1527,14 +1538,8 @@ static int tg_shares_up(struct task_group *tg, void *data)
         if (!rq_weight)
                 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
  
-       for_each_cpu_mask(i, sd->span) {
-               struct rq *rq = cpu_rq(i);
-               unsigned long flags;
-
-               spin_lock_irqsave(&rq->lock, flags);
-               __update_group_shares_cpu(tg, i, shares, rq_weight);
-               spin_unlock_irqrestore(&rq->lock, flags);
-       }
+       for_each_cpu_mask(i, sd->span)
+               update_group_shares_cpu(tg, i, shares, rq_weight);
  
         return 0;
  }
@@ -4443,12 +4448,8 @@ need_resched_nonpreemptible:
         if (sched_feat(HRTICK))
                 hrtick_clear(rq);
  
-       /*
-        * Do the rq-clock update outside the rq lock:
-        */
-       local_irq_disable();
+       spin_lock_irq(&rq->lock);
         update_rq_clock(rq);
-       spin_lock(&rq->lock);
         clear_tsk_need_resched(prev);
  
         if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index f604dae..9573c33 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -73,6 +73,8 @@ unsigned int sysctl_sched_wakeup_granularity = 5000000UL;
  
  const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
  
+static const struct sched_class fair_sched_class;
+
  /**************************************************************
   * CFS operations on generic schedulable entities:
   */
@@ -334,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
  #endif
  
  /*
- * delta *= w / rw
+ * delta *= P[w / rw]
   */
  static inline unsigned long
  calc_delta_weight(unsigned long delta, struct sched_entity *se)
@@ -348,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se)
  }
  
  /*
- * delta *= rw / w
+ * delta /= w
   */
  static inline unsigned long
  calc_delta_fair(unsigned long delta, struct sched_entity *se)
  {
-       for_each_sched_entity(se) {
-               delta = calc_delta_mine(delta,
-                               cfs_rq_of(se)->load.weight, &se->load);
-       }
+       if (unlikely(se->load.weight != NICE_0_LOAD))
+               delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
  
         return delta;
  }
@@ -386,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running)
   * We calculate the wall-time slice from the period by taking a part
   * proportional to the weight.
   *
- * s = p*w/rw
+ * s = p*P[w/rw]
   */
  static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
-       return calc_delta_weight(__sched_period(cfs_rq->nr_running), se);
+       unsigned long nr_running = cfs_rq->nr_running;
+
+       if (unlikely(!se->on_rq))
+               nr_running++;
+
+       return calc_delta_weight(__sched_period(nr_running), se);
  }
  
  /*
   * We calculate the vruntime slice of a to be inserted task
   *
- * vs = s*rw/w = p
+ * vs = s/w
   */
-static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
-       unsigned long nr_running = cfs_rq->nr_running;
-
-       if (!se->on_rq)
-               nr_running++;
-
-       return __sched_period(nr_running);
+       return calc_delta_fair(sched_slice(cfs_rq, se), se);
  }
  
  /*
@@ -628,7 +628,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
          * stays open at the end.
          */
         if (initial && sched_feat(START_DEBIT))
-               vruntime += sched_vslice_add(cfs_rq, se);
+               vruntime += sched_vslice(cfs_rq, se);
  
         if (!initial) {
                 /* sleeps upto a single latency don't count. */
@@ -748,7 +748,7 @@ pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
         struct rq *rq = rq_of(cfs_rq);
         u64 pair_slice = rq->clock - cfs_rq->pair_start;
  
-       if (!cfs_rq->next || pair_slice > sched_slice(cfs_rq, cfs_rq->next)) {
+       if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
                 cfs_rq->pair_start = rq->clock;
                 return se;
         }
@@ -849,11 +849,31 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
                 hrtick_start(rq, delta);
         }
  }
+
+/*
+ * called from enqueue/dequeue and updates the hrtick when the
+ * current task is from our class and nr_running is low enough
+ * to matter.
+ */
+static void hrtick_update(struct rq *rq)
+{
+       struct task_struct *curr = rq->curr;
+
+       if (curr->sched_class != &fair_sched_class)
+               return;
+
+       if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
+               hrtick_start_fair(rq, curr);
+}
  #else /* !CONFIG_SCHED_HRTICK */
  static inline void
  hrtick_start_fair(struct rq *rq, struct task_struct *p)
  {
  }
+
+static inline void hrtick_update(struct rq *rq)
+{
+}
  #endif
  
  /*
@@ -874,7 +894,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
                 wakeup = 1;
         }
  
-       hrtick_start_fair(rq, rq->curr);
+       hrtick_update(rq);
  }
  
  /*
@@ -896,7 +916,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
                 sleep = 1;
         }
  
-       hrtick_start_fair(rq, rq->curr);
+       hrtick_update(rq);
  }
  
  /*
@@ -1002,8 +1022,6 @@ static inline int wake_idle(int cpu, struct task_struct *p)
  
  #ifdef CONFIG_SMP
  
-static const struct sched_class fair_sched_class;
-
  #ifdef CONFIG_FAIR_GROUP_SCHED
  /*
   * effective_load() calculates the load change as seen from the root_task_group
diff --git a/kernel/sched_features.h b/kernel/sched_features.h

index 7c9e8f4..fda0162 100644 (file)
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -5,7 +5,7 @@ SCHED_FEAT(START_DEBIT, 1)
  SCHED_FEAT(AFFINE_WAKEUPS, 1)
  SCHED_FEAT(CACHE_HOT_BUDDY, 1)
  SCHED_FEAT(SYNC_WAKEUPS, 1)
-SCHED_FEAT(HRTICK, 1)
+SCHED_FEAT(HRTICK, 0)
  SCHED_FEAT(DOUBLE_TICK, 0)
  SCHED_FEAT(ASYM_GRAN, 1)
  SCHED_FEAT(LB_BIAS, 1)
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h

index b8c1569..2df9d29 100644 (file)
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -9,7 +9,7 @@
  static int show_schedstat(struct seq_file *seq, void *v)
  {
         int cpu;
-       int mask_len = NR_CPUS/32 * 9;
+       int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
         char *mask_str = kmalloc(mask_len, GFP_KERNEL);
  
         if (mask_str == NULL)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index b3cc739..a13bd4d 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -276,6 +276,16 @@ static struct ctl_table kern_table[] = {
         },
         {
                 .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "sched_shares_thresh",
+               .data           = &sysctl_sched_shares_thresh,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+       },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
                 .procname       = "sched_child_runs_first",
                 .data           = &sysctl_sched_child_runs_first,
                 .maxlen         = sizeof(unsigned int),
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 23 Oct 2008 16:37:16 +0000 (09:37 -0700)
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history
kernel/sched_features.h		patch \| blob \| history
kernel/sched_stats.h		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history