sched: clean up code under CONFIG_FAIR_GROUP_SCHED

author Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>

Mon, 15 Oct 2007 15:00:09 +0000 (17:00 +0200)

committer Ingo Molnar <mingo@elte.hu>

Mon, 15 Oct 2007 15:00:09 +0000 (17:00 +0200)
author Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Mon, 15 Oct 2007 15:00:09 +0000 (17:00 +0200)
committer Ingo Molnar <mingo@elte.hu>
Mon, 15 Oct 2007 15:00:09 +0000 (17:00 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 6616900..03c13b6 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
  
  struct seq_file;
  struct cfs_rq;
+struct task_grp;
  #ifdef CONFIG_SCHED_DEBUG
  extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
  extern void proc_sched_set_task(struct task_struct *p);
@@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
  
  extern void normalize_rt_tasks(void);
  
+#ifdef CONFIG_FAIR_GROUP_SCHED
+
+extern struct task_grp init_task_grp;
+
+extern struct task_grp *sched_create_group(void);
+extern void sched_destroy_group(struct task_grp *tg);
+extern void sched_move_task(struct task_struct *tsk);
+extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
+
+#endif
+
  #ifdef CONFIG_TASK_XACCT
  static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
  {
diff --git a/init/Kconfig b/init/Kconfig

index 11c6762..ef90a15 100644 (file)
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -282,13 +282,12 @@ config CPUSETS
           Say N if unsure.
  
  config FAIR_GROUP_SCHED
-       bool "Fair group scheduler"
-       depends on EXPERIMENTAL && CONTAINERS
+       bool "Fair group cpu scheduler"
+       default n
+       depends on EXPERIMENTAL
         help
-         This option enables you to group tasks and control CPU resource
-         allocation to such groups.
-
-         Say N if unsure.
+         This feature lets cpu scheduler recognize task groups and control cpu
+         bandwidth allocation to such task groups.
  
  config SYSFS_DEPRECATED
         bool "Create deprecated sysfs files"
diff --git a/kernel/sched.c b/kernel/sched.c

index ee7ac71..e10c403 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -173,13 +173,10 @@ struct rt_prio_array {
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
-#include <linux/container.h>
-
  struct cfs_rq;
  
  /* task group related information */
  struct task_grp {
-       struct container_subsys_state css;
         /* schedulable entities of this group on each cpu */
         struct sched_entity **se;
         /* runqueue "owned" by this group on each cpu */
@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
  /* Default task group's cfs_rq on each cpu */
  static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
  
-static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS];
-static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS];
+static struct sched_entity *init_sched_entity_p[NR_CPUS];
+static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
  
  /* Default task group.
   *     Every task in system belong to this group at bootup.
   */
-static struct task_grp init_task_grp =  {
-                                       .se     = init_sched_entity_p,
-                                       .cfs_rq = init_cfs_rq_p,
-                                       };
+struct task_grp init_task_grp =  {
+                               .se     = init_sched_entity_p,
+                               .cfs_rq = init_cfs_rq_p,
+                                };
+
+#define INIT_TASK_GRP_LOAD     NICE_0_LOAD
+static int init_task_grp_load = INIT_TASK_GRP_LOAD;
  
  /* return group to which a task belongs */
  static inline struct task_grp *task_grp(struct task_struct *p)
  {
-       return container_of(task_subsys_state(p, cpu_subsys_id),
-                               struct task_grp, css);
+       struct task_grp *tg;
+
+       tg  = &init_task_grp;
+
+       return tg;
  }
  
  /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -250,6 +253,7 @@ struct cfs_rq {
          */
         struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
         struct task_grp *tg;    /* group that "owns" this runqueue */
+       struct rcu_head rcu;
  #endif
  };
  
@@ -6513,11 +6517,12 @@ void __init sched_init(void)
                         init_sched_entity_p[i] = se;
                         se->cfs_rq = &rq->cfs;
                         se->my_q = cfs_rq;
-                       se->load.weight = NICE_0_LOAD;
-                       se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);
+                       se->load.weight = init_task_grp_load;
+                       se->load.inv_weight =
+                                div64_64(1ULL<<32, init_task_grp_load);
                         se->parent = NULL;
                 }
-               init_task_grp.shares = NICE_0_LOAD;
+               init_task_grp.shares = init_task_grp_load;
  #endif
  
                 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p)
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
-/* return corresponding task_grp object of a container */
-static inline struct task_grp *container_tg(struct container *cont)
-{
-       return container_of(container_subsys_state(cont, cpu_subsys_id),
-                                        struct task_grp, css);
-}
-
  /* allocate runqueue etc for a new task group */
-static struct container_subsys_state *
-sched_create_group(struct container_subsys *ss, struct container *cont)
+struct task_grp *sched_create_group(void)
  {
         struct task_grp *tg;
         struct cfs_rq *cfs_rq;
         struct sched_entity *se;
+       struct rq *rq;
         int i;
  
-       if (!cont->parent) {
-               /* This is early initialization for the top container */
-               init_task_grp.css.container = cont;
-               return &init_task_grp.css;
-       }
-
-       /* we support only 1-level deep hierarchical scheduler atm */
-       if (cont->parent->parent)
-               return ERR_PTR(-EINVAL);
-
         tg = kzalloc(sizeof(*tg), GFP_KERNEL);
         if (!tg)
                 return ERR_PTR(-ENOMEM);
  
-       tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL);
+       tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
         if (!tg->cfs_rq)
                 goto err;
-       tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL);
+       tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
         if (!tg->se)
                 goto err;
  
         for_each_possible_cpu(i) {
-               struct rq *rq = cpu_rq(i);
+               rq = cpu_rq(i);
  
                 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
                                                          cpu_to_node(i));
@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
                 tg->cfs_rq[i] = cfs_rq;
                 init_cfs_rq(cfs_rq, rq);
                 cfs_rq->tg = tg;
-               list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
  
                 tg->se[i] = se;
                 se->cfs_rq = &rq->cfs;
@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
                 se->parent = NULL;
         }
  
-       tg->shares = NICE_0_LOAD;
+       for_each_possible_cpu(i) {
+               rq = cpu_rq(i);
+               cfs_rq = tg->cfs_rq[i];
+               list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+       }
  
-       /* Bind the container to task_grp object we just created */
-       tg->css.container = cont;
+       tg->shares = NICE_0_LOAD;
  
-       return &tg->css;
+       return tg;
  
  err:
         for_each_possible_cpu(i) {
@@ -6797,24 +6787,14 @@ err:
         return ERR_PTR(-ENOMEM);
  }
  
-
-/* destroy runqueue etc associated with a task group */
-static void sched_destroy_group(struct container_subsys *ss,
-                                       struct container *cont)
+/* rcu callback to free various structures associated with a task group */
+static void free_sched_group(struct rcu_head *rhp)
  {
-       struct task_grp *tg = container_tg(cont);
-       struct cfs_rq *cfs_rq;
+       struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
+       struct task_grp *tg = cfs_rq->tg;
         struct sched_entity *se;
         int i;
  
-       for_each_possible_cpu(i) {
-               cfs_rq = tg->cfs_rq[i];
-               list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-       }
-
-       /* wait for possible concurrent references to cfs_rqs complete */
-       synchronize_sched();
-
         /* now it should be safe to free those cfs_rqs */
         for_each_possible_cpu(i) {
                 cfs_rq = tg->cfs_rq[i];
@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss,
         kfree(tg);
  }
  
-static int sched_can_attach(struct container_subsys *ss,
-                            struct container *cont, struct task_struct *tsk)
+/* Destroy runqueue etc associated with a task group */
+void sched_destroy_group(struct task_grp *tg)
  {
-       /* We don't support RT-tasks being in separate groups */
-       if (tsk->sched_class != &fair_sched_class)
-               return -EINVAL;
+       struct cfs_rq *cfs_rq;
+       int i;
  
-       return 0;
+       for_each_possible_cpu(i) {
+               cfs_rq = tg->cfs_rq[i];
+               list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+       }
+
+       cfs_rq = tg->cfs_rq[0];
+
+       /* wait for possible concurrent references to cfs_rqs complete */
+       call_rcu(&cfs_rq->rcu, free_sched_group);
  }
  
-/* change task's runqueue when it moves between groups */
-static void sched_move_task(struct container_subsys *ss, struct container *cont,
-                       struct container *old_cont, struct task_struct *tsk)
+/* change task's runqueue when it moves between groups.
+ *     The caller of this function should have put the task in its new group
+ *     by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
+ *     reflect its new group.
+ */
+void sched_move_task(struct task_struct *tsk)
  {
         int on_rq, running;
         unsigned long flags;
@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
         spin_unlock_irq(&rq->lock);
  }
  
-static ssize_t cpu_shares_write(struct container *cont, struct cftype *cftype,
-                               struct file *file, const char __user *userbuf,
-                               size_t nbytes, loff_t *ppos)
+int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
  {
         int i;
-       unsigned long shareval;
-       struct task_grp *tg = container_tg(cont);
-       char buffer[2*sizeof(unsigned long) + 1];
-
-       if (nbytes > 2*sizeof(unsigned long))   /* safety check */
-               return -E2BIG;
  
-       if (copy_from_user(buffer, userbuf, nbytes))
-               return -EFAULT;
+       if (tg->shares == shares)
+               return 0;
  
-       buffer[nbytes] = 0;     /* nul-terminate */
-       shareval = simple_strtoul(buffer, NULL, 10);
+       /* return -EINVAL if the new value is not sane */
  
-       tg->shares = shareval;
+       tg->shares = shares;
         for_each_possible_cpu(i)
-               set_se_shares(tg->se[i], shareval);
-
-       return nbytes;
-}
-
-static u64 cpu_shares_read_uint(struct container *cont, struct cftype *cft)
-{
-       struct task_grp *tg = container_tg(cont);
-
-       return (u64) tg->shares;
-}
+               set_se_shares(tg->se[i], shares);
  
-struct cftype cpuctl_share = {
-       .name = "shares",
-       .read_uint = cpu_shares_read_uint,
-       .write = cpu_shares_write,
-};
-
-static int sched_populate(struct container_subsys *ss, struct container *cont)
-{
-       return container_add_file(cont, ss, &cpuctl_share);
+       return 0;
  }
  
-struct container_subsys cpu_subsys = {
-       .name = "cpu",
-       .create = sched_create_group,
-       .destroy  = sched_destroy_group,
-       .can_attach = sched_can_attach,
-       .attach = sched_move_task,
-       .populate = sched_populate,
-       .subsys_id = cpu_subsys_id,
-       .early_init = 1,
-};
-
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif         /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 556942c..abd65ed 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
         if (!cfs_rq->nr_running)
                 return MAX_PRIO;
  
-       curr = __pick_next_entity(cfs_rq);
+       curr = cfs_rq->curr;
+       if (!curr)
+               curr = __pick_next_entity(cfs_rq);
+
         p = task_of(curr);
  
         return p->prio;
author	Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
	Mon, 15 Oct 2007 15:00:09 +0000 (17:00 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Mon, 15 Oct 2007 15:00:09 +0000 (17:00 +0200)
include/linux/sched.h		patch \| blob \| history
init/Kconfig		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history