sched: Scale down cpu_power due to RT tasks

author Peter Zijlstra <a.p.zijlstra@chello.nl>

Tue, 1 Sep 2009 08:34:37 +0000 (10:34 +0200)

committer Ingo Molnar <mingo@elte.hu>

Fri, 4 Sep 2009 08:09:55 +0000 (10:09 +0200)
author Peter Zijlstra <a.p.zijlstra@chello.nl>
Tue, 1 Sep 2009 08:34:37 +0000 (10:34 +0200)
committer Ingo Molnar <mingo@elte.hu>
Fri, 4 Sep 2009 08:09:55 +0000 (10:09 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 9c81c92..c67ddf3 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1831,6 +1831,7 @@ extern unsigned int sysctl_sched_child_runs_first;
  extern unsigned int sysctl_sched_features;
  extern unsigned int sysctl_sched_migration_cost;
  extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
  extern unsigned int sysctl_timer_migration;
  
  int sched_nr_latency_handler(struct ctl_table *table, int write,
diff --git a/kernel/sched.c b/kernel/sched.c

index 036600f..ab532b5 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -627,6 +627,9 @@ struct rq {
  
         struct task_struct *migration_thread;
         struct list_head migration_queue;
+
+       u64 rt_avg;
+       u64 age_stamp;
  #endif
  
         /* calc_load related fields */
@@ -863,6 +866,14 @@ unsigned int sysctl_sched_shares_ratelimit = 250000;
  unsigned int sysctl_sched_shares_thresh = 4;
  
  /*
+ * period over which we average the RT time consumption, measured
+ * in ms.
+ *
+ * default: 1s
+ */
+const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
+
+/*
   * period over which we measure -rt task cpu usage in us.
   * default: 1s
   */
@@ -1280,12 +1291,37 @@ void wake_up_idle_cpu(int cpu)
  }
  #endif /* CONFIG_NO_HZ */
  
+static u64 sched_avg_period(void)
+{
+       return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
+}
+
+static void sched_avg_update(struct rq *rq)
+{
+       s64 period = sched_avg_period();
+
+       while ((s64)(rq->clock - rq->age_stamp) > period) {
+               rq->age_stamp += period;
+               rq->rt_avg /= 2;
+       }
+}
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+       rq->rt_avg += rt_delta;
+       sched_avg_update(rq);
+}
+
  #else /* !CONFIG_SMP */
  static void resched_task(struct task_struct *p)
  {
         assert_spin_locked(&task_rq(p)->lock);
         set_tsk_need_resched(p);
  }
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+}
  #endif /* CONFIG_SMP */
  
  #if BITS_PER_LONG == 32
@@ -3699,7 +3735,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
  }
  #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
  
-unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
  {
         unsigned long weight = cpumask_weight(sched_domain_span(sd));
         unsigned long smt_gain = sd->smt_gain;
@@ -3709,6 +3745,24 @@ unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
         return smt_gain;
  }
  
+unsigned long scale_rt_power(int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+       u64 total, available;
+
+       sched_avg_update(rq);
+
+       total = sched_avg_period() + (rq->clock - rq->age_stamp);
+       available = total - rq->rt_avg;
+
+       if (unlikely((s64)total < SCHED_LOAD_SCALE))
+               total = SCHED_LOAD_SCALE;
+
+       total >>= SCHED_LOAD_SHIFT;
+
+       return div_u64(available, total);
+}
+
  static void update_cpu_power(struct sched_domain *sd, int cpu)
  {
         unsigned long weight = cpumask_weight(sched_domain_span(sd));
@@ -3719,11 +3773,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
         /* here we could scale based on cpufreq */
  
         if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-               power *= arch_smt_gain(sd, cpu);
+               power *= arch_scale_smt_power(sd, cpu);
                 power >>= SCHED_LOAD_SHIFT;
         }
  
-       /* here we could scale based on RT time */
+       power *= scale_rt_power(cpu);
+       power >>= SCHED_LOAD_SHIFT;
+
+       if (!power)
+               power = 1;
  
         if (power != old) {
                 sdg->__cpu_power = power;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index 3d4020a..2eb4bd6 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -615,6 +615,8 @@ static void update_curr_rt(struct rq *rq)
         curr->se.exec_start = rq->clock;
         cpuacct_charge(curr, delta_exec);
  
+       sched_rt_avg_update(rq, delta_exec);
+
         if (!rt_bandwidth_enabled())
                 return;
  
@@ -887,8 +889,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
  
         if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
                 enqueue_pushable_task(rq, p);
-
-       inc_cpu_load(rq, p->se.load.weight);
  }
  
  static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -899,8 +899,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
         dequeue_rt_entity(rt_se);
  
         dequeue_pushable_task(rq, p);
-
-       dec_cpu_load(rq, p->se.load.weight);
  }
  
  /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 58be760..6c9836e 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -332,6 +332,14 @@ static struct ctl_table kern_table[] = {
         },
         {
                 .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "sched_time_avg",
+               .data           = &sysctl_sched_time_avg,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
                 .procname       = "timer_migration",
                 .data           = &sysctl_timer_migration,
                 .maxlen         = sizeof(unsigned int),
author	Peter Zijlstra <a.p.zijlstra@chello.nl>
	Tue, 1 Sep 2009 08:34:37 +0000 (10:34 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 4 Sep 2009 08:09:55 +0000 (10:09 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_rt.c		patch \| blob \| history
kernel/sysctl.c		patch \| blob \| history