Merge branches 'sched/domains' and 'sched/clock' into sched/core

author Ingo Molnar <mingo@elte.hu>

Fri, 4 Sep 2009 08:08:43 +0000 (10:08 +0200)

committer Ingo Molnar <mingo@elte.hu>

Fri, 4 Sep 2009 08:08:47 +0000 (10:08 +0200)
author Ingo Molnar <mingo@elte.hu>
Fri, 4 Sep 2009 08:08:43 +0000 (10:08 +0200)
committer Ingo Molnar <mingo@elte.hu>
Fri, 4 Sep 2009 08:08:47 +0000 (10:08 +0200)
diff --combined kernel/sched.c

index 38d05a8,cf4c953,1b59e26..da1edc8
--- 1/kernel/sched.c
--- 2/kernel/sched.c
--- 3/kernel/sched.c
+++ b/kernel/sched.c
@@@@ -309,8 -309,8 -309,8 +309,8 @@@@ void set_tg_uid(struct user_struct *use
    
    /*
     * Root task group.
- -- *   Every UID task group (including init_task_group aka UID-0) will
- -- *   be a child to this group.
+ ++ *   Every UID task group (including init_task_group aka UID-0) will
+ ++ *   be a child to this group.
     */
    struct task_group root_task_group;
    
@@@@ -318,7 -318,7 -318,7 +318,7 @@@@
    /* Default task group's sched entity on each cpu */
    static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
    /* Default task group's cfs_rq on each cpu */
- --static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
+ ++static DEFINE_PER_CPU(struct cfs_rq, init_tg_cfs_rq) ____cacheline_aligned_in_smp;
    #endif /* CONFIG_FAIR_GROUP_SCHED */
    
    #ifdef CONFIG_RT_GROUP_SCHED
@@@@ -616,7 -616,6 -616,6 +616,7 @@@@ struct rq 
    
         unsigned char idle_at_tick;
         /* For active balancing */
+ ++     int post_schedule;
         int active_balance;
         int push_cpu;
         /* cpu of this runqueue: */
@@@@ -694,7 -693,6 -693,6 +694,7 @@@@ static inline int cpu_of(struct rq *rq
    #define this_rq()            (&__get_cpu_var(runqueues))
    #define task_rq(p)           cpu_rq(task_cpu(p))
    #define cpu_curr(cpu)                (cpu_rq(cpu)->curr)
+ ++#define raw_rq()             (&__raw_get_cpu_var(runqueues))
    
    inline void update_rq_clock(struct rq *rq)
    {
@@@@ -1515,35 -1513,28 -1513,28 +1515,35 @@@@ static unsigned long cpu_avg_load_per_t
    
    #ifdef CONFIG_FAIR_GROUP_SCHED
    
+ ++struct update_shares_data {
+ ++     unsigned long rq_weight[NR_CPUS];
+ ++};
+ ++
+ ++static DEFINE_PER_CPU(struct update_shares_data, update_shares_data);
+ ++
    static void __set_se_shares(struct sched_entity *se, unsigned long shares);
    
    /*
     * Calculate and set the cpu's group shares.
     */
- --static void
- --update_group_shares_cpu(struct task_group *tg, int cpu,
- --                     unsigned long sd_shares, unsigned long sd_rq_weight)
+ ++static void update_group_shares_cpu(struct task_group *tg, int cpu,
+ ++                                 unsigned long sd_shares,
+ ++                                 unsigned long sd_rq_weight,
+ ++                                 struct update_shares_data *usd)
    {
- --     unsigned long shares;
- --     unsigned long rq_weight;
- --
- --     if (!tg->se[cpu])
- --             return;
+ ++     unsigned long shares, rq_weight;
+ ++     int boost = 0;
    
- --     rq_weight = tg->cfs_rq[cpu]->rq_weight;
+ ++     rq_weight = usd->rq_weight[cpu];
+ ++     if (!rq_weight) {
+ ++             boost = 1;
+ ++             rq_weight = NICE_0_LOAD;
+ ++     }
    
         /*
- --      *           \Sum shares * rq_weight
- --      * shares =  -----------------------
- --      *               \Sum rq_weight
- --      *
+ ++      *             \Sum_j shares_j * rq_weight_i
+ ++      * shares_i =  -----------------------------
+ ++      *                  \Sum_j rq_weight_j
          */
         shares = (sd_shares * rq_weight) / sd_rq_weight;
         shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
@@@@ -1554,8 -1545,8 -1545,8 +1554,8 @@@@
                 unsigned long flags;
    
                 spin_lock_irqsave(&rq->lock, flags);
- --             tg->cfs_rq[cpu]->shares = shares;
- --
+ ++             tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight;
+ ++             tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
                 __set_se_shares(tg->se[cpu], shares);
                 spin_unlock_irqrestore(&rq->lock, flags);
         }
@@@@ -1568,30 -1559,22 -1559,22 +1568,30 @@@@
     */
    static int tg_shares_up(struct task_group *tg, void *data)
    {
- --     unsigned long weight, rq_weight = 0;
- --     unsigned long shares = 0;
+ ++     unsigned long weight, rq_weight = 0, shares = 0;
+ ++     struct update_shares_data *usd;
         struct sched_domain *sd = data;
+ ++     unsigned long flags;
         int i;
    
+ ++     if (!tg->se[0])
+ ++             return 0;
+ ++
+ ++     local_irq_save(flags);
+ ++     usd = &__get_cpu_var(update_shares_data);
+ ++
         for_each_cpu(i, sched_domain_span(sd)) {
+ ++             weight = tg->cfs_rq[i]->load.weight;
+ ++             usd->rq_weight[i] = weight;
+ ++
                 /*
                  * If there are currently no tasks on the cpu pretend there
                  * is one of average load so that when a new task gets to
                  * run here it will not get delayed by group starvation.
                  */
- --             weight = tg->cfs_rq[i]->load.weight;
                 if (!weight)
                         weight = NICE_0_LOAD;
    
- --             tg->cfs_rq[i]->rq_weight = weight;
                 rq_weight += weight;
                 shares += tg->cfs_rq[i]->shares;
         }
@@@@ -1603,9 -1586,7 -1586,7 +1603,9 @@@@
                 shares = tg->shares;
    
         for_each_cpu(i, sched_domain_span(sd))
- --             update_group_shares_cpu(tg, i, shares, rq_weight);
+ ++             update_group_shares_cpu(tg, i, shares, rq_weight, usd);
+ ++
+ ++     local_irq_restore(flags);
    
         return 0;
    }
@@@@ -1635,14 -1616,8 -1616,8 +1635,14 @@@@ static int tg_load_down(struct task_gro
    
    static void update_shares(struct sched_domain *sd)
    {
- --     u64 now = cpu_clock(raw_smp_processor_id());
- --     s64 elapsed = now - sd->last_update;
+ ++     s64 elapsed;
+ ++     u64 now;
+ ++
+ ++     if (root_task_group_empty())
+ ++             return;
+ ++
+ ++     now = cpu_clock(raw_smp_processor_id());
+ ++     elapsed = now - sd->last_update;
    
         if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
                 sd->last_update = now;
@@@@ -1652,9 -1627,6 -1627,6 +1652,9 @@@@
    
    static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
    {
+ ++     if (root_task_group_empty())
+ ++             return;
+ ++
         spin_unlock(&rq->lock);
         update_shares(sd);
         spin_lock(&rq->lock);
@@@@ -1662,9 -1634,6 -1634,6 +1662,9 @@@@
    
    static void update_h_load(long cpu)
    {
+ ++     if (root_task_group_empty())
+ ++             return;
+ ++
         walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
    }
    
@@@@ -2668,32 -2637,9 -2637,9 +2668,32 @@@@ void sched_fork(struct task_struct *p, 
         set_task_cpu(p, cpu);
    
         /*
- --      * Make sure we do not leak PI boosting priority to the child:
+ ++      * Make sure we do not leak PI boosting priority to the child.
          */
         p->prio = current->normal_prio;
+ ++
+ ++     /*
+ ++      * Revert to default priority/policy on fork if requested.
+ ++      */
+ ++     if (unlikely(p->sched_reset_on_fork)) {
+ ++             if (p->policy == SCHED_FIFO || p->policy == SCHED_RR)
+ ++                     p->policy = SCHED_NORMAL;
+ ++
+ ++             if (p->normal_prio < DEFAULT_PRIO)
+ ++                     p->prio = DEFAULT_PRIO;
+ ++
+ ++             if (PRIO_TO_NICE(p->static_prio) < 0) {
+ ++                     p->static_prio = NICE_TO_PRIO(0);
+ ++                     set_load_weight(p);
+ ++             }
+ ++
+ ++             /*
+ ++              * We don't need the reset flag anymore after the fork. It has
+ ++              * fulfilled its duty:
+ ++              */
+ ++             p->sched_reset_on_fork = 0;
+ ++     }
+ ++
         if (!rt_prio(p->prio))
                 p->sched_class = &fair_sched_class;
    
@@@@ -2850,6 -2796,12 -2796,12 +2850,6 @@@@ static void finish_task_switch(struct r
    {
         struct mm_struct *mm = rq->prev_mm;
         long prev_state;
- --#ifdef CONFIG_SMP
- --     int post_schedule = 0;
- --
- --     if (current->sched_class->needs_post_schedule)
- --             post_schedule = current->sched_class->needs_post_schedule(rq);
- --#endif
    
         rq->prev_mm = NULL;
    
@@@@ -2868,6 -2820,10 -2820,10 +2868,6 @@@@
         finish_arch_switch(prev);
         perf_counter_task_sched_in(current, cpu_of(rq));
         finish_lock_switch(rq, prev);
- --#ifdef CONFIG_SMP
- --     if (post_schedule)
- --             current->sched_class->post_schedule(rq);
- --#endif
    
         fire_sched_in_preempt_notifiers(current);
         if (mm)
@@@@ -2882,42 -2838,6 -2838,6 +2882,42 @@@@
         }
    }
    
+ ++#ifdef CONFIG_SMP
+ ++
+ ++/* assumes rq->lock is held */
+ ++static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
+ ++{
+ ++     if (prev->sched_class->pre_schedule)
+ ++             prev->sched_class->pre_schedule(rq, prev);
+ ++}
+ ++
+ ++/* rq->lock is NOT held, but preemption is disabled */
+ ++static inline void post_schedule(struct rq *rq)
+ ++{
+ ++     if (rq->post_schedule) {
+ ++             unsigned long flags;
+ ++
+ ++             spin_lock_irqsave(&rq->lock, flags);
+ ++             if (rq->curr->sched_class->post_schedule)
+ ++                     rq->curr->sched_class->post_schedule(rq);
+ ++             spin_unlock_irqrestore(&rq->lock, flags);
+ ++
+ ++             rq->post_schedule = 0;
+ ++     }
+ ++}
+ ++
+ ++#else
+ ++
+ ++static inline void pre_schedule(struct rq *rq, struct task_struct *p)
+ ++{
+ ++}
+ ++
+ ++static inline void post_schedule(struct rq *rq)
+ ++{
+ ++}
+ ++
+ ++#endif
+ ++
    /**
     * schedule_tail - first thing a freshly forked thread must call.
     * @prev: the thread we just switched away from.
@@@@ -2928,13 -2848,6 -2848,6 +2928,13 @@@@ asmlinkage void schedule_tail(struct ta
         struct rq *rq = this_rq();
    
         finish_task_switch(rq, prev);
+ ++
+ ++     /*
+ ++      * FIXME: do we need to worry about rq being invalidated by the
+ ++      * task_switch?
+ ++      */
+ ++     post_schedule(rq);
+ ++
    #ifdef __ARCH_WANT_UNLOCKED_CTXSW
         /* In this case, finish_task_switch does not reenable preemption */
         preempt_enable();
@@@@ -3466,10 -3379,9 -3379,9 +3466,10 @@@@ static int move_one_task(struct rq *thi
    {
         const struct sched_class *class;
    
- --     for (class = sched_class_highest; class; class = class->next)
+ ++     for_each_class(class) {
                 if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
                         return 1;
+ ++     }
    
         return 0;
    }
@@@@ -5437,7 -5349,10 -5349,10 +5437,7 @@@@ need_resched_nonpreemptible
                 switch_count = &prev->nvcsw;
         }
    
- --#ifdef CONFIG_SMP
- --     if (prev->sched_class->pre_schedule)
- --             prev->sched_class->pre_schedule(rq, prev);
- --#endif
+ ++     pre_schedule(rq, prev);
    
         if (unlikely(!rq->nr_running))
                 idle_balance(cpu, rq);
@@@@ -5463,8 -5378,6 -5378,6 +5463,8 @@@@
         } else
                 spin_unlock_irq(&rq->lock);
    
+ ++     post_schedule(rq);
+ ++
         if (unlikely(reacquire_kernel_lock(current) < 0))
                 goto need_resched_nonpreemptible;
    
@@@@ -6210,25 -6123,17 -6123,17 +6210,25 @@@@ static int __sched_setscheduler(struct 
         unsigned long flags;
         const struct sched_class *prev_class = p->sched_class;
         struct rq *rq;
+ ++     int reset_on_fork;
    
         /* may grab non-irq protected spin_locks */
         BUG_ON(in_interrupt());
    recheck:
         /* double check policy once rq lock held */
- --     if (policy < 0)
+ ++     if (policy < 0) {
+ ++             reset_on_fork = p->sched_reset_on_fork;
                 policy = oldpolicy = p->policy;
- --     else if (policy != SCHED_FIFO && policy != SCHED_RR &&
- --                     policy != SCHED_NORMAL && policy != SCHED_BATCH &&
- --                     policy != SCHED_IDLE)
- --             return -EINVAL;
+ ++     } else {
+ ++             reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
+ ++             policy &= ~SCHED_RESET_ON_FORK;
+ ++
+ ++             if (policy != SCHED_FIFO && policy != SCHED_RR &&
+ ++                             policy != SCHED_NORMAL && policy != SCHED_BATCH &&
+ ++                             policy != SCHED_IDLE)
+ ++                     return -EINVAL;
+ ++     }
+ ++
         /*
          * Valid priorities for SCHED_FIFO and SCHED_RR are
          * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
@@@@ -6272,10 -6177,6 -6177,6 +6272,10 @@@@
                 /* can't change other user's priorities */
                 if (!check_same_owner(p))
                         return -EPERM;
+ ++
+ ++             /* Normal users shall not reset the sched_reset_on_fork flag */
+ ++             if (p->sched_reset_on_fork && !reset_on_fork)
+ ++                     return -EPERM;
         }
    
         if (user) {
@@@@ -6319,8 -6220,6 -6220,6 +6319,8 @@@@
         if (running)
                 p->sched_class->put_prev_task(rq, p);
    
+ ++     p->sched_reset_on_fork = reset_on_fork;
+ ++
         oldprio = p->prio;
         __setscheduler(rq, p, policy, param->sched_priority);
    
@@@@ -6437,15 -6336,14 -6336,14 +6437,15 @@@@ SYSCALL_DEFINE1(sched_getscheduler, pid
         if (p) {
                 retval = security_task_getscheduler(p);
                 if (!retval)
- --                     retval = p->policy;
+ ++                     retval = p->policy
+ ++                             | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
         }
         read_unlock(&tasklist_lock);
         return retval;
    }
    
    /**
- -- * sys_sched_getscheduler - get the RT priority of a thread
+ ++ * sys_sched_getparam - get the RT priority of a thread
     * @pid: the pid in question.
     * @param: structure containing the RT priority.
     */
@@@@ -6673,9 -6571,19 -6571,19 +6673,9 @@@@ static inline int should_resched(void
    
    static void __cond_resched(void)
    {
- --#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
- --     __might_sleep(__FILE__, __LINE__);
- --#endif
- --     /*
- --      * The BKS might be reacquired before we have dropped
- --      * PREEMPT_ACTIVE, which could trigger a second
- --      * cond_resched() call.
- --      */
- --     do {
- --             add_preempt_count(PREEMPT_ACTIVE);
- --             schedule();
- --             sub_preempt_count(PREEMPT_ACTIVE);
- --     } while (need_resched());
+ ++     add_preempt_count(PREEMPT_ACTIVE);
+ ++     schedule();
+ ++     sub_preempt_count(PREEMPT_ACTIVE);
    }
    
    int __sched _cond_resched(void)
@@@@ -6689,14 -6597,14 -6597,14 +6689,14 @@@@
    EXPORT_SYMBOL(_cond_resched);
    
    /*
- -- * cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ ++ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
     * call schedule, and on return reacquire the lock.
     *
     * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
     * operations here to prevent schedule() from being called twice (once via
     * spin_unlock(), once by hand).
     */
- --int cond_resched_lock(spinlock_t *lock)
+ ++int __cond_resched_lock(spinlock_t *lock)
    {
         int resched = should_resched();
         int ret = 0;
@@@@ -6712,9 -6620,9 -6620,9 +6712,9 @@@@
         }
         return ret;
    }
- --EXPORT_SYMBOL(cond_resched_lock);
+ ++EXPORT_SYMBOL(__cond_resched_lock);
    
- --int __sched cond_resched_softirq(void)
+ ++int __sched __cond_resched_softirq(void)
    {
         BUG_ON(!in_softirq());
    
@@@@ -6726,7 -6634,7 -6634,7 +6726,7 @@@@
         }
         return 0;
    }
- --EXPORT_SYMBOL(cond_resched_softirq);
+ ++EXPORT_SYMBOL(__cond_resched_softirq);
    
    /**
     * yield - yield the current processor to other threads.
@@@@ -6750,13 -6658,11 -6658,11 +6750,13 @@@@ EXPORT_SYMBOL(yield)
     */
    void __sched io_schedule(void)
    {
- --     struct rq *rq = &__raw_get_cpu_var(runqueues);
+ ++     struct rq *rq = raw_rq();
    
         delayacct_blkio_start();
         atomic_inc(&rq->nr_iowait);
+ ++     current->in_iowait = 1;
         schedule();
+ ++     current->in_iowait = 0;
         atomic_dec(&rq->nr_iowait);
         delayacct_blkio_end();
    }
@@@@ -6764,14 -6670,12 -6670,12 +6764,14 @@@@ EXPORT_SYMBOL(io_schedule)
    
    long __sched io_schedule_timeout(long timeout)
    {
- --     struct rq *rq = &__raw_get_cpu_var(runqueues);
+ ++     struct rq *rq = raw_rq();
         long ret;
    
         delayacct_blkio_start();
         atomic_inc(&rq->nr_iowait);
+ ++     current->in_iowait = 1;
         ret = schedule_timeout(timeout);
+ ++     current->in_iowait = 0;
         atomic_dec(&rq->nr_iowait);
         delayacct_blkio_end();
         return ret;
@@@@ -7088,12 -6992,8 -6992,8 +7088,12 @@@@ int set_cpus_allowed_ptr(struct task_st
    
         if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
                 /* Need help from migration thread: drop lock and wait. */
+ ++             struct task_struct *mt = rq->migration_thread;
+ ++
+ ++             get_task_struct(mt);
                 task_rq_unlock(rq, &flags);
                 wake_up_process(rq->migration_thread);
+ ++             put_task_struct(mt);
                 wait_for_completion(&req.done);
                 tlb_migrate_finish(p->mm);
                 return 0;
@@@@ -7725,7 -7625,7 -7625,7 +7725,7 @@@@ static int __init migration_init(void
         migration_call(&migration_notifier, CPU_ONLINE, cpu);
         register_cpu_notifier(&migration_notifier);
    
- --     return err;
+ ++     return 0;
    }
    early_initcall(migration_init);
    #endif
@@@@ -7941,7 -7841,7 -7841,7 +7941,7 @@@@ static void rq_attach_root(struct rq *r
         rq->rd = rd;
    
         cpumask_set_cpu(rq->cpu, rd->span);
- --     if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
+ ++     if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
                 set_rq_online(rq);
    
         spin_unlock_irqrestore(&rq->lock, flags);
@@@@ -8191,6 -8091,39 -8091,6 +8191,39 @@@@ struct static_sched_domain 
         DECLARE_BITMAP(span, CONFIG_NR_CPUS);
    };
    
+ +struct s_data {
+ +#ifdef CONFIG_NUMA
+ +     int                     sd_allnodes;
+ +     cpumask_var_t           domainspan;
+ +     cpumask_var_t           covered;
+ +     cpumask_var_t           notcovered;
+ +#endif
+ +     cpumask_var_t           nodemask;
+ +     cpumask_var_t           this_sibling_map;
+ +     cpumask_var_t           this_core_map;
+ +     cpumask_var_t           send_covered;
+ +     cpumask_var_t           tmpmask;
+ +     struct sched_group      **sched_group_nodes;
+ +     struct root_domain      *rd;
+ +};
+ +
+ +enum s_alloc {
+ +     sa_sched_groups = 0,
+ +     sa_rootdomain,
+ +     sa_tmpmask,
+ +     sa_send_covered,
+ +     sa_this_core_map,
+ +     sa_this_sibling_map,
+ +     sa_nodemask,
+ +     sa_sched_group_nodes,
+ +#ifdef CONFIG_NUMA
+ +     sa_notcovered,
+ +     sa_covered,
+ +     sa_domainspan,
+ +#endif
+ +     sa_none,
+ +};
+ +
    /*
     * SMT sched-domains:
     */
@@@@ -8313,6 -8246,71 -8213,6 +8346,71 @@@@ static void init_numa_sched_groups_powe
                 sg = sg->next;
         } while (sg != group_head);
    }
+ +
+ +static int build_numa_sched_groups(struct s_data *d,
+ +                                const struct cpumask *cpu_map, int num)
+ +{
+ +     struct sched_domain *sd;
+ +     struct sched_group *sg, *prev;
+ +     int n, j;
+ +
+ +     cpumask_clear(d->covered);
+ +     cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map);
+ +     if (cpumask_empty(d->nodemask)) {
+ +             d->sched_group_nodes[num] = NULL;
+ +             goto out;
+ +     }
+ +
+ +     sched_domain_node_span(num, d->domainspan);
+ +     cpumask_and(d->domainspan, d->domainspan, cpu_map);
+ +
+ +     sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
+ +                       GFP_KERNEL, num);
+ +     if (!sg) {
+ +             printk(KERN_WARNING "Can not alloc domain group for node %d\n",
+ +                    num);
+ +             return -ENOMEM;
+ +     }
+ +     d->sched_group_nodes[num] = sg;
+ +
+ +     for_each_cpu(j, d->nodemask) {
+ +             sd = &per_cpu(node_domains, j).sd;
+ +             sd->groups = sg;
+ +     }
+ +
+ +     sg->__cpu_power = 0;
+ +     cpumask_copy(sched_group_cpus(sg), d->nodemask);
+ +     sg->next = sg;
+ +     cpumask_or(d->covered, d->covered, d->nodemask);
+ +
+ +     prev = sg;
+ +     for (j = 0; j < nr_node_ids; j++) {
+ +             n = (num + j) % nr_node_ids;
+ +             cpumask_complement(d->notcovered, d->covered);
+ +             cpumask_and(d->tmpmask, d->notcovered, cpu_map);
+ +             cpumask_and(d->tmpmask, d->tmpmask, d->domainspan);
+ +             if (cpumask_empty(d->tmpmask))
+ +                     break;
+ +             cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n));
+ +             if (cpumask_empty(d->tmpmask))
+ +                     continue;
+ +             sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
+ +                               GFP_KERNEL, num);
+ +             if (!sg) {
+ +                     printk(KERN_WARNING
+ +                            "Can not alloc domain group for node %d\n", j);
+ +                     return -ENOMEM;
+ +             }
+ +             sg->__cpu_power = 0;
+ +             cpumask_copy(sched_group_cpus(sg), d->tmpmask);
+ +             sg->next = prev->next;
+ +             cpumask_or(d->covered, d->covered, d->tmpmask);
+ +             prev->next = sg;
+ +             prev = sg;
+ +     }
+ +out:
+ +     return 0;
+ +}
    #endif /* CONFIG_NUMA */
    
    #ifdef CONFIG_NUMA
@@@@ -8478,280 -8476,285 -8378,280 +8576,285 @@@@ static void set_domain_attribute(struc
         }
    }
    
- -/*
- - * Build sched domains for a given set of cpus and attach the sched domains
- - * to the individual cpus
- - */
- -static int __build_sched_domains(const struct cpumask *cpu_map,
- -                              struct sched_domain_attr *attr)
- -{
- -     int i, err = -ENOMEM;
- -     struct root_domain *rd;
- -     cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
- -             tmpmask;
+ +static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
+ +                              const struct cpumask *cpu_map)
+ +{
+ +     switch (what) {
+ +     case sa_sched_groups:
+ +             free_sched_groups(cpu_map, d->tmpmask); /* fall through */
+ +             d->sched_group_nodes = NULL;
+ +     case sa_rootdomain:
+ +             free_rootdomain(d->rd); /* fall through */
+ +     case sa_tmpmask:
+ +             free_cpumask_var(d->tmpmask); /* fall through */
+ +     case sa_send_covered:
+ +             free_cpumask_var(d->send_covered); /* fall through */
+ +     case sa_this_core_map:
+ +             free_cpumask_var(d->this_core_map); /* fall through */
+ +     case sa_this_sibling_map:
+ +             free_cpumask_var(d->this_sibling_map); /* fall through */
+ +     case sa_nodemask:
+ +             free_cpumask_var(d->nodemask); /* fall through */
+ +     case sa_sched_group_nodes:
    #ifdef CONFIG_NUMA
- -     cpumask_var_t domainspan, covered, notcovered;
- -     struct sched_group **sched_group_nodes = NULL;
- -     int sd_allnodes = 0;
- -
- -     if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
- -             goto out;
- -     if (!alloc_cpumask_var(&covered, GFP_KERNEL))
- -             goto free_domainspan;
- -     if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
- -             goto free_covered;
-  -#endif
-  -
-  -     if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
-  -             goto free_notcovered;
-  -     if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
-  -             goto free_nodemask;
-  -     if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
-  -             goto free_this_sibling_map;
-  -     if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
-  -             goto free_this_core_map;
-  -     if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
-  -             goto free_send_covered;
+ +             kfree(d->sched_group_nodes); /* fall through */
+ +     case sa_notcovered:
+ +             free_cpumask_var(d->notcovered); /* fall through */
+ +     case sa_covered:
+ +             free_cpumask_var(d->covered); /* fall through */
+ +     case sa_domainspan:
+ +             free_cpumask_var(d->domainspan); /* fall through */
+  +#endif
+ +     case sa_none:
+ +             break;
+ +     }
+ +}
    
-       if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
-               goto free_notcovered;
-       if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
-               goto free_nodemask;
-       if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
-               goto free_this_sibling_map;
-       if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
-               goto free_this_core_map;
-       if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
-               goto free_send_covered;
-  
+ +static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
+ +                                                const struct cpumask *cpu_map)
+ +{
    #ifdef CONFIG_NUMA
- -     /*
- -      * Allocate the per-node list of sched groups
- -      */
- -     sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *),
- -                                 GFP_KERNEL);
- -     if (!sched_group_nodes) {
+ +     if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL))
+ +             return sa_none;
+ +     if (!alloc_cpumask_var(&d->covered, GFP_KERNEL))
+ +             return sa_domainspan;
+ +     if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL))
+ +             return sa_covered;
+ +     /* Allocate the per-node list of sched groups */
+ +     d->sched_group_nodes = kcalloc(nr_node_ids,
+ +                                   sizeof(struct sched_group *), GFP_KERNEL);
+ +     if (!d->sched_group_nodes) {
                 printk(KERN_WARNING "Can not alloc sched group node list\n");
- -             goto free_tmpmask;
-  -     }
-  -#endif
-  -
-  -     rd = alloc_rootdomain();
-  -     if (!rd) {
+ +             return sa_notcovered;
+  +     }
+ +     sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
+  +#endif
-  
-       rd = alloc_rootdomain();
-       if (!rd) {
+ +     if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
+ +             return sa_sched_group_nodes;
+ +     if (!alloc_cpumask_var(&d->this_sibling_map, GFP_KERNEL))
+ +             return sa_nodemask;
+ +     if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
+ +             return sa_this_sibling_map;
+ +     if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+ +             return sa_this_core_map;
+ +     if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
+ +             return sa_send_covered;
+ +     d->rd = alloc_rootdomain();
+ +     if (!d->rd) {
                 printk(KERN_WARNING "Cannot alloc root domain\n");
- -             goto free_sched_groups;
+ +             return sa_tmpmask;
         }
+ +     return sa_rootdomain;
+ +}
    
+ +static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
+ +     const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
+ +{
+ +     struct sched_domain *sd = NULL;
    #ifdef CONFIG_NUMA
- -     sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
- -#endif
- -
- -     /*
- -      * Set up domains for cpus specified by the cpu_map.
- -      */
- -     for_each_cpu(i, cpu_map) {
- -             struct sched_domain *sd = NULL, *p;
- -
- -             cpumask_and(nodemask, cpumask_of_node(cpu_to_node(i)), cpu_map);
- -
- -#ifdef CONFIG_NUMA
- -             if (cpumask_weight(cpu_map) >
- -                             SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
- -                     sd = &per_cpu(allnodes_domains, i).sd;
- -                     SD_INIT(sd, ALLNODES);
- -                     set_domain_attribute(sd, attr);
- -                     cpumask_copy(sched_domain_span(sd), cpu_map);
- -                     cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
- -                     p = sd;
- -                     sd_allnodes = 1;
- -             } else
- -                     p = NULL;
+ +     struct sched_domain *parent;
    
- -             sd = &per_cpu(node_domains, i).sd;
- -             SD_INIT(sd, NODE);
+ +     d->sd_allnodes = 0;
+ +     if (cpumask_weight(cpu_map) >
+ +         SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
+ +             sd = &per_cpu(allnodes_domains, i).sd;
+ +             SD_INIT(sd, ALLNODES);
                 set_domain_attribute(sd, attr);
- -             sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
- -             sd->parent = p;
- -             if (p)
- -                     p->child = sd;
- -             cpumask_and(sched_domain_span(sd),
- -                         sched_domain_span(sd), cpu_map);
+ +             cpumask_copy(sched_domain_span(sd), cpu_map);
+ +             cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
+ +             d->sd_allnodes = 1;
+ +     }
+ +     parent = sd;
+ +
+ +     sd = &per_cpu(node_domains, i).sd;
+ +     SD_INIT(sd, NODE);
+ +     set_domain_attribute(sd, attr);
+ +     sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
+ +     sd->parent = parent;
+ +     if (parent)
+ +             parent->child = sd;
+ +     cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
    #endif
+ +     return sd;
+ +}
    
- -             p = sd;
- -             sd = &per_cpu(phys_domains, i).sd;
- -             SD_INIT(sd, CPU);
- -             set_domain_attribute(sd, attr);
- -             cpumask_copy(sched_domain_span(sd), nodemask);
- -             sd->parent = p;
- -             if (p)
- -                     p->child = sd;
- -             cpu_to_phys_group(i, cpu_map, &sd->groups, tmpmask);
+ +static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
+ +     const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+ +     struct sched_domain *parent, int i)
+ +{
+ +     struct sched_domain *sd;
+ +     sd = &per_cpu(phys_domains, i).sd;
+ +     SD_INIT(sd, CPU);
+ +     set_domain_attribute(sd, attr);
+ +     cpumask_copy(sched_domain_span(sd), d->nodemask);
+ +     sd->parent = parent;
+ +     if (parent)
+ +             parent->child = sd;
+ +     cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
+ +     return sd;
+ +}
    
+ +static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
+ +     const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+ +     struct sched_domain *parent, int i)
+ +{
+ +     struct sched_domain *sd = parent;
    #ifdef CONFIG_SCHED_MC
- -             p = sd;
- -             sd = &per_cpu(core_domains, i).sd;
- -             SD_INIT(sd, MC);
- -             set_domain_attribute(sd, attr);
- -             cpumask_and(sched_domain_span(sd), cpu_map,
- -                                                cpu_coregroup_mask(i));
- -             sd->parent = p;
- -             p->child = sd;
- -             cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
+ +     sd = &per_cpu(core_domains, i).sd;
+ +     SD_INIT(sd, MC);
+ +     set_domain_attribute(sd, attr);
+ +     cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
+ +     sd->parent = parent;
+ +     parent->child = sd;
+ +     cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
    #endif
+ +     return sd;
+ +}
    
+ +static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
+ +     const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+ +     struct sched_domain *parent, int i)
+ +{
+ +     struct sched_domain *sd = parent;
    #ifdef CONFIG_SCHED_SMT
- -             p = sd;
- -             sd = &per_cpu(cpu_domains, i).sd;
- -             SD_INIT(sd, SIBLING);
- -             set_domain_attribute(sd, attr);
- -             cpumask_and(sched_domain_span(sd),
- -                         topology_thread_cpumask(i), cpu_map);
- -             sd->parent = p;
- -             p->child = sd;
- -             cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
+ +     sd = &per_cpu(cpu_domains, i).sd;
+ +     SD_INIT(sd, SIBLING);
+ +     set_domain_attribute(sd, attr);
+ +     cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
+ +     sd->parent = parent;
+ +     parent->child = sd;
+ +     cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
    #endif
- -     }
+ +     return sd;
+ +}
    
+ +static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
+ +                            const struct cpumask *cpu_map, int cpu)
+ +{
+ +     switch (l) {
    #ifdef CONFIG_SCHED_SMT
- -     /* Set up CPU (sibling) groups */
- -     for_each_cpu(i, cpu_map) {
- -             cpumask_and(this_sibling_map,
- -                         topology_thread_cpumask(i), cpu_map);
- -             if (i != cpumask_first(this_sibling_map))
- -                     continue;
- -
- -             init_sched_build_groups(this_sibling_map, cpu_map,
- -                                     &cpu_to_cpu_group,
- -                                     send_covered, tmpmask);
- -     }
+ +     case SD_LV_SIBLING: /* set up CPU (sibling) groups */
+ +             cpumask_and(d->this_sibling_map, cpu_map,
+ +                         topology_thread_cpumask(cpu));
+ +             if (cpu == cpumask_first(d->this_sibling_map))
+ +                     init_sched_build_groups(d->this_sibling_map, cpu_map,
+ +                                             &cpu_to_cpu_group,
+ +                                             d->send_covered, d->tmpmask);
+ +             break;
    #endif
- -
    #ifdef CONFIG_SCHED_MC
- -     /* Set up multi-core groups */
- -     for_each_cpu(i, cpu_map) {
- -             cpumask_and(this_core_map, cpu_coregroup_mask(i), cpu_map);
- -             if (i != cpumask_first(this_core_map))
- -                     continue;
- -
- -             init_sched_build_groups(this_core_map, cpu_map,
- -                                     &cpu_to_core_group,
- -                                     send_covered, tmpmask);
- -     }
+ +     case SD_LV_MC: /* set up multi-core groups */
+ +             cpumask_and(d->this_core_map, cpu_map, cpu_coregroup_mask(cpu));
+ +             if (cpu == cpumask_first(d->this_core_map))
+ +                     init_sched_build_groups(d->this_core_map, cpu_map,
+ +                                             &cpu_to_core_group,
+ +                                             d->send_covered, d->tmpmask);
+ +             break;
    #endif
- -
- -     /* Set up physical groups */
- -     for (i = 0; i < nr_node_ids; i++) {
- -             cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
- -             if (cpumask_empty(nodemask))
- -                     continue;
- -
- -             init_sched_build_groups(nodemask, cpu_map,
- -                                     &cpu_to_phys_group,
- -                                     send_covered, tmpmask);
- -     }
- -
+ +     case SD_LV_CPU: /* set up physical groups */
+ +             cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
+ +             if (!cpumask_empty(d->nodemask))
+ +                     init_sched_build_groups(d->nodemask, cpu_map,
+ +                                             &cpu_to_phys_group,
+ +                                             d->send_covered, d->tmpmask);
+ +             break;
    #ifdef CONFIG_NUMA
- -     /* Set up node groups */
- -     if (sd_allnodes) {
- -             init_sched_build_groups(cpu_map, cpu_map,
- -                                     &cpu_to_allnodes_group,
- -                                     send_covered, tmpmask);
+ +     case SD_LV_ALLNODES:
+ +             init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,
+ +                                     d->send_covered, d->tmpmask);
+ +             break;
+ +#endif
+ +     default:
+ +             break;
         }
+ +}
    
- -     for (i = 0; i < nr_node_ids; i++) {
- -             /* Set up node groups */
- -             struct sched_group *sg, *prev;
- -             int j;
- -
- -             cpumask_clear(covered);
- -             cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
- -             if (cpumask_empty(nodemask)) {
- -                     sched_group_nodes[i] = NULL;
- -                     continue;
- -             }
+ +/*
+ + * Build sched domains for a given set of cpus and attach the sched domains
+ + * to the individual cpus
+ + */
+ +static int __build_sched_domains(const struct cpumask *cpu_map,
+ +                              struct sched_domain_attr *attr)
+ +{
+ +     enum s_alloc alloc_state = sa_none;
+ +     struct s_data d;
+ +     struct sched_domain *sd;
+ +     int i;
+ +#ifdef CONFIG_NUMA
+ +     d.sd_allnodes = 0;
+ +#endif
    
- -             sched_domain_node_span(i, domainspan);
- -             cpumask_and(domainspan, domainspan, cpu_map);
+ +     alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
+ +     if (alloc_state != sa_rootdomain)
+ +             goto error;
+ +     alloc_state = sa_sched_groups;
    
- -             sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
- -                               GFP_KERNEL, i);
- -             if (!sg) {
- -                     printk(KERN_WARNING "Can not alloc domain group for "
- -                             "node %d\n", i);
- -                     goto error;
- -             }
- -             sched_group_nodes[i] = sg;
- -             for_each_cpu(j, nodemask) {
- -                     struct sched_domain *sd;
+ +     /*
+ +      * Set up domains for cpus specified by the cpu_map.
+ +      */
+ +     for_each_cpu(i, cpu_map) {
+ +             cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
+ +                         cpu_map);
    
- -                     sd = &per_cpu(node_domains, j).sd;
- -                     sd->groups = sg;
- -             }
- -             sg->__cpu_power = 0;
- -             cpumask_copy(sched_group_cpus(sg), nodemask);
- -             sg->next = sg;
- -             cpumask_or(covered, covered, nodemask);
- -             prev = sg;
+ +             sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
+ +             sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
+ +             sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
+ +             sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
+ +     }
    
- -             for (j = 0; j < nr_node_ids; j++) {
- -                     int n = (i + j) % nr_node_ids;
+ +     for_each_cpu(i, cpu_map) {
+ +             build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
+ +             build_sched_groups(&d, SD_LV_MC, cpu_map, i);
+ +     }
    
- -                     cpumask_complement(notcovered, covered);
- -                     cpumask_and(tmpmask, notcovered, cpu_map);
- -                     cpumask_and(tmpmask, tmpmask, domainspan);
- -                     if (cpumask_empty(tmpmask))
- -                             break;
+ +     /* Set up physical groups */
+ +     for (i = 0; i < nr_node_ids; i++)
+ +             build_sched_groups(&d, SD_LV_CPU, cpu_map, i);
    
- -                     cpumask_and(tmpmask, tmpmask, cpumask_of_node(n));
- -                     if (cpumask_empty(tmpmask))
- -                             continue;
+ +#ifdef CONFIG_NUMA
+ +     /* Set up node groups */
+ +     if (d.sd_allnodes)
+ +             build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);
    
- -                     sg = kmalloc_node(sizeof(struct sched_group) +
- -                                       cpumask_size(),
- -                                       GFP_KERNEL, i);
- -                     if (!sg) {
- -                             printk(KERN_WARNING
- -                             "Can not alloc domain group for node %d\n", j);
- -                             goto error;
- -                     }
- -                     sg->__cpu_power = 0;
- -                     cpumask_copy(sched_group_cpus(sg), tmpmask);
- -                     sg->next = prev->next;
- -                     cpumask_or(covered, covered, tmpmask);
- -                     prev->next = sg;
- -                     prev = sg;
- -             }
- -     }
+ +     for (i = 0; i < nr_node_ids; i++)
+ +             if (build_numa_sched_groups(&d, cpu_map, i))
+ +                     goto error;
    #endif
    
         /* Calculate CPU power for physical packages and nodes */
    #ifdef CONFIG_SCHED_SMT
         for_each_cpu(i, cpu_map) {
- -             struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
- -
+ +             sd = &per_cpu(cpu_domains, i).sd;
                 init_sched_groups_power(i, sd);
         }
    #endif
    #ifdef CONFIG_SCHED_MC
         for_each_cpu(i, cpu_map) {
- -             struct sched_domain *sd = &per_cpu(core_domains, i).sd;
- -
+ +             sd = &per_cpu(core_domains, i).sd;
                 init_sched_groups_power(i, sd);
         }
    #endif
    
         for_each_cpu(i, cpu_map) {
- -             struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
- -
+ +             sd = &per_cpu(phys_domains, i).sd;
                 init_sched_groups_power(i, sd);
         }
    
    #ifdef CONFIG_NUMA
         for (i = 0; i < nr_node_ids; i++)
- -             init_numa_sched_groups_power(sched_group_nodes[i]);
+ +             init_numa_sched_groups_power(d.sched_group_nodes[i]);
    
- -     if (sd_allnodes) {
+ +     if (d.sd_allnodes) {
                 struct sched_group *sg;
    
                 cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
- -                                                             tmpmask);
+ +                                                             d.tmpmask);
                 init_numa_sched_groups_power(sg);
         }
    #endif
    
         /* Attach the domains */
         for_each_cpu(i, cpu_map) {
- -             struct sched_domain *sd;
    #ifdef CONFIG_SCHED_SMT
                 sd = &per_cpu(cpu_domains, i).sd;
    #elif defined(CONFIG_SCHED_MC)
@@@@ -8759,44 -8762,16 -8659,44 +8862,16 @@@@
    #else
                 sd = &per_cpu(phys_domains, i).sd;
    #endif
- -             cpu_attach_domain(sd, rd, i);
-       }
-  
-       err = 0;
-  
-  free_tmpmask:
-       free_cpumask_var(tmpmask);
-  free_send_covered:
-       free_cpumask_var(send_covered);
-  free_this_core_map:
-       free_cpumask_var(this_core_map);
-  free_this_sibling_map:
-       free_cpumask_var(this_sibling_map);
-  free_nodemask:
-       free_cpumask_var(nodemask);
-  free_notcovered:
-  #ifdef CONFIG_NUMA
-       free_cpumask_var(notcovered);
-  free_covered:
-       free_cpumask_var(covered);
-  free_domainspan:
-       free_cpumask_var(domainspan);
-  out:
-  #endif
-       return err;
+ +             cpu_attach_domain(sd, d.rd, i);
+       }
    
-  -     err = 0;
-  -
-  -free_tmpmask:
-  -     free_cpumask_var(tmpmask);
-  -free_send_covered:
-  -     free_cpumask_var(send_covered);
-  -free_this_core_map:
-  -     free_cpumask_var(this_core_map);
-  -free_this_sibling_map:
-  -     free_cpumask_var(this_sibling_map);
-  -free_nodemask:
-  -     free_cpumask_var(nodemask);
-  -free_notcovered:
-  -#ifdef CONFIG_NUMA
-  -     free_cpumask_var(notcovered);
-  -free_covered:
-  -     free_cpumask_var(covered);
-  -free_domainspan:
-  -     free_cpumask_var(domainspan);
-  -out:
-  -#endif
-  -     return err;
-  -
- -free_sched_groups:
- -#ifdef CONFIG_NUMA
- -     kfree(sched_group_nodes);
- -#endif
- -     goto free_tmpmask;
+ +     d.sched_group_nodes = NULL; /* don't free this we still need it */
+ +     __free_domain_allocs(&d, sa_tmpmask, cpu_map);
+ +     return 0;
    
- -#ifdef CONFIG_NUMA
    error:
- -     free_sched_groups(cpu_map, tmpmask);
- -     free_rootdomain(rd);
- -     goto free_tmpmask;
- -#endif
+ +     __free_domain_allocs(&d, alloc_state, cpu_map);
+ +     return -ENOMEM;
    }
    
    static int build_sched_domains(const struct cpumask *cpu_map)
@@@@ -9404,11 -9379,11 -9304,11 +9479,11 @@@@ void __init sched_init(void
                  * system cpu resource, based on the weight assigned to root
                  * user's cpu share (INIT_TASK_GROUP_LOAD). This is accomplished
                  * by letting tasks of init_task_group sit in a separate cfs_rq
- --              * (init_cfs_rq) and having one entity represent this group of
+ ++              * (init_tg_cfs_rq) and having one entity represent this group of
                  * tasks in rq->cfs (i.e init_task_group->se[] != NULL).
                  */
                 init_tg_cfs_entry(&init_task_group,
- --                             &per_cpu(init_cfs_rq, i),
+ ++                             &per_cpu(init_tg_cfs_rq, i),
                                 &per_cpu(init_sched_entity, i), i, 1,
                                 root_task_group.se[i]);
    
@@@@ -9434,7 -9409,6 -9334,6 +9509,7 @@@@
    #ifdef CONFIG_SMP
                 rq->sd = NULL;
                 rq->rd = NULL;
+ ++             rq->post_schedule = 0;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
                 rq->push_cpu = 0;
@@@@ -9499,20 -9473,13 -9398,13 +9574,20 @@@@
    }
    
    #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
- --void __might_sleep(char *file, int line)
+ ++static inline int preempt_count_equals(int preempt_offset)
+ ++{
+ ++     int nested = preempt_count() & ~PREEMPT_ACTIVE;
+ ++
+ ++     return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
+ ++}
+ ++
+ ++void __might_sleep(char *file, int line, int preempt_offset)
    {
    #ifdef in_atomic
         static unsigned long prev_jiffy;        /* ratelimiting */
    
- --     if ((!in_atomic() && !irqs_disabled()) ||
- --                 system_state != SYSTEM_RUNNING || oops_in_progress)
+ ++     if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
+ ++         system_state != SYSTEM_RUNNING || oops_in_progress)
                 return;
         if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                 return;
author	Ingo Molnar <mingo@elte.hu>
	Fri, 4 Sep 2009 08:08:43 +0000 (10:08 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Fri, 4 Sep 2009 08:08:47 +0000 (10:08 +0200)