Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 16 Mar 2011 01:37:30 +0000 (18:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 16 Mar 2011 01:37:30 +0000 (18:37 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 16 Mar 2011 01:37:30 +0000 (18:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 16 Mar 2011 01:37:30 +0000 (18:37 -0700)
diff --combined include/linux/interrupt.h

index d746da1,a1382b9..2eb16e0
--- 1/include/linux/interrupt.h
--- 2/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@@ -55,7 -55,7 +55,7 @@@
    *                Used by threaded interrupts which need to keep the
    *                irq line disabled until the threaded handler has been run.
    * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
- - *
+ + * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
    */
   #define IRQF_DISABLED         0x00000020
   #define IRQF_SAMPLE_RANDOM    0x00000040
@@@ -67,7 -67,6 +67,7 @@@
   #define IRQF_IRQPOLL          0x00001000
   #define IRQF_ONESHOT          0x00002000
   #define IRQF_NO_SUSPEND               0x00004000
+ +#define IRQF_FORCE_RESUME     0x00008000
   
   #define IRQF_TIMER            (__IRQF_TIMER | IRQF_NO_SUSPEND)
   
@@@ -427,6 -426,13 +427,13 @@@ extern void raise_softirq(unsigned int 
    */
   DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
   
+ DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
+ 
+ static inline struct task_struct *this_cpu_ksoftirqd(void)
+ {
+       return this_cpu_read(ksoftirqd);
+ }
+ 
   /* Try to send a softirq to a remote cpu.  If this cannot be done, the
    * work will be queued to the local cpu.
    */
diff --combined include/linux/sched.h

index c57e527,755c476..214af2e
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -1058,6 -1058,7 +1058,7 @@@ struct sched_class 
         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
         void (*yield_task) (struct rq *rq);
+       bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
   
         void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
   
@@@ -1084,12 -1085,10 +1085,10 @@@
         void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
         void (*task_fork) (struct task_struct *p);
   
-       void (*switched_from) (struct rq *this_rq, struct task_struct *task,
-                              int running);
-       void (*switched_to) (struct rq *this_rq, struct task_struct *task,
-                            int running);
+       void (*switched_from) (struct rq *this_rq, struct task_struct *task);
+       void (*switched_to) (struct rq *this_rq, struct task_struct *task);
         void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
-                            int oldprio, int running);
+                            int oldprio);
   
         unsigned int (*get_rr_interval) (struct rq *rq,
                                          struct task_struct *task);
@@@ -1715,7 -1714,6 +1714,6 @@@ extern void thread_group_times(struct t
   /*
    * Per process flags
    */
- #define PF_KSOFTIRQD  0x00000001      /* I am ksoftirqd */
   #define PF_STARTING   0x00000002      /* being created */
   #define PF_EXITING    0x00000004      /* getting shut down */
   #define PF_EXITPIDONE 0x00000008      /* pi exit done on shut down */
@@@ -1945,8 -1943,6 +1943,6 @@@ int sched_rt_handler(struct ctl_table *
                 void __user *buffer, size_t *lenp,
                 loff_t *ppos);
   
- extern unsigned int sysctl_sched_compat_yield;
- 
   #ifdef CONFIG_SCHED_AUTOGROUP
   extern unsigned int sysctl_sched_autogroup_enabled;
   
@@@ -1977,6 -1973,7 +1973,7 @@@ static inline int rt_mutex_getprio(stru
   # define rt_mutex_adjust_pi(p)                do { } while (0)
   #endif
   
+ extern bool yield_to(struct task_struct *p, bool preempt);
   extern void set_user_nice(struct task_struct *p, long nice);
   extern int task_prio(const struct task_struct *p);
   extern int task_nice(const struct task_struct *p);
@@@ -2578,6 -2575,13 +2575,6 @@@ static inline void inc_syscw(struct tas
   #define TASK_SIZE_OF(tsk)     TASK_SIZE
   #endif
   
- -/*
- - * Call the function if the target task is executing on a CPU right now:
- - */
- -extern void task_oncpu_function_call(struct task_struct *p,
- -                                   void (*func) (void *info), void *info);
- -
- -
   #ifdef CONFIG_MM_OWNER
   extern void mm_update_next_owner(struct mm_struct *mm);
   extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --combined kernel/sched.c

index 57a18e8,61452e8..27125e4
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -324,7 -324,7 +324,7 @@@ struct cfs_rq 
          * 'curr' points to currently running entity on this cfs_rq.
          * It is set to NULL otherwise (i.e when none are currently running).
          */
-       struct sched_entity *curr, *next, *last;
+       struct sched_entity *curr, *next, *last, *skip;
   
         unsigned int nr_spread_over;
   
@@@ -606,6 -606,9 +606,6 @@@ static inline struct task_group *task_g
         struct task_group *tg;
         struct cgroup_subsys_state *css;
   
- -      if (p->flags & PF_EXITING)
- -              return &root_task_group;
- -
         css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
                         lockdep_is_held(&task_rq(p)->lock));
         tg = container_of(css, struct task_group, css);
@@@ -1683,6 -1686,39 +1683,39 @@@ static void double_rq_unlock(struct rq 
                 __release(rq2->lock);
   }
   
+ #else /* CONFIG_SMP */
+ 
+ /*
+  * double_rq_lock - safely lock two runqueues
+  *
+  * Note this does not disable interrupts like task_rq_lock,
+  * you need to do so manually before calling.
+  */
+ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
+       __acquires(rq1->lock)
+       __acquires(rq2->lock)
+ {
+       BUG_ON(!irqs_disabled());
+       BUG_ON(rq1 != rq2);
+       raw_spin_lock(&rq1->lock);
+       __acquire(rq2->lock);   /* Fake it out ;) */
+ }
+ 
+ /*
+  * double_rq_unlock - safely unlock two runqueues
+  *
+  * Note this does not restore interrupts like task_rq_unlock,
+  * you need to do so manually after calling.
+  */
+ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+       __releases(rq1->lock)
+       __releases(rq2->lock)
+ {
+       BUG_ON(rq1 != rq2);
+       raw_spin_unlock(&rq1->lock);
+       __release(rq2->lock);
+ }
+ 
   #endif
   
   static void calc_load_account_idle(struct rq *this_rq);
@@@ -1877,7 -1913,7 +1910,7 @@@ void account_system_vtime(struct task_s
          */
         if (hardirq_count())
                 __this_cpu_add(cpu_hardirq_time, delta);
-       else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
+       else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
                 __this_cpu_add(cpu_softirq_time, delta);
   
         irq_time_write_end();
@@@ -1917,8 -1953,40 +1950,40 @@@ static void update_rq_clock_task(struc
                 sched_rt_avg_update(rq, irq_delta);
   }
   
+ static int irqtime_account_hi_update(void)
+ {
+       struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+       unsigned long flags;
+       u64 latest_ns;
+       int ret = 0;
+ 
+       local_irq_save(flags);
+       latest_ns = this_cpu_read(cpu_hardirq_time);
+       if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
+               ret = 1;
+       local_irq_restore(flags);
+       return ret;
+ }
+ 
+ static int irqtime_account_si_update(void)
+ {
+       struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+       unsigned long flags;
+       u64 latest_ns;
+       int ret = 0;
+ 
+       local_irq_save(flags);
+       latest_ns = this_cpu_read(cpu_softirq_time);
+       if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
+               ret = 1;
+       local_irq_restore(flags);
+       return ret;
+ }
+ 
   #else /* CONFIG_IRQ_TIME_ACCOUNTING */
   
+ #define sched_clock_irqtime   (0)
+ 
   static void update_rq_clock_task(struct rq *rq, s64 delta)
   {
         rq->clock_task += delta;
@@@ -2022,14 -2090,14 +2087,14 @@@ inline int task_curr(const struct task_
   
   static inline void check_class_changed(struct rq *rq, struct task_struct *p,
                                        const struct sched_class *prev_class,
-                                      int oldprio, int running)
+                                      int oldprio)
   {
         if (prev_class != p->sched_class) {
                 if (prev_class->switched_from)
-                       prev_class->switched_from(rq, p, running);
-               p->sched_class->switched_to(rq, p, running);
-       } else
-               p->sched_class->prio_changed(rq, p, oldprio, running);
+                       prev_class->switched_from(rq, p);
+               p->sched_class->switched_to(rq, p);
+       } else if (oldprio != p->prio)
+               p->sched_class->prio_changed(rq, p, oldprio);
   }
   
   static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
@@@ -2262,6 -2330,27 +2327,6 @@@ void kick_process(struct task_struct *p
   EXPORT_SYMBOL_GPL(kick_process);
   #endif /* CONFIG_SMP */
   
- -/**
- - * task_oncpu_function_call - call a function on the cpu on which a task runs
- - * @p:                the task to evaluate
- - * @func:     the function to be called
- - * @info:     the function call argument
- - *
- - * Calls the function @func when the task is currently running. This might
- - * be on the current CPU, which just calls the function directly
- - */
- -void task_oncpu_function_call(struct task_struct *p,
- -                            void (*func) (void *info), void *info)
- -{
- -      int cpu;
- -
- -      preempt_disable();
- -      cpu = task_cpu(p);
- -      if (task_curr(p))
- -              smp_call_function_single(cpu, func, info, 1);
- -      preempt_enable();
- -}
- -
   #ifdef CONFIG_SMP
   /*
    * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
@@@ -2542,6 -2631,7 +2607,7 @@@ static void __sched_fork(struct task_st
         p->se.sum_exec_runtime          = 0;
         p->se.prev_sum_exec_runtime     = 0;
         p->se.nr_migrations             = 0;
+       p->se.vruntime                  = 0;
   
   #ifdef CONFIG_SCHEDSTATS
         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@@ -2752,12 -2842,9 +2818,12 @@@ static inline voi
   prepare_task_switch(struct rq *rq, struct task_struct *prev,
                     struct task_struct *next)
   {
+ +      sched_info_switch(prev, next);
+ +      perf_event_task_sched_out(prev, next);
         fire_sched_out_preempt_notifiers(prev, next);
         prepare_lock_switch(rq, next);
         prepare_arch_switch(next);
+ +      trace_sched_switch(prev, next);
   }
   
   /**
@@@ -2890,7 -2977,7 +2956,7 @@@ context_switch(struct rq *rq, struct ta
         struct mm_struct *mm, *oldmm;
   
         prepare_task_switch(rq, prev, next);
- -      trace_sched_switch(prev, next);
+ +
         mm = next->mm;
         oldmm = prev->active_mm;
         /*
@@@ -3547,6 -3634,32 +3613,32 @@@ static void account_guest_time(struct t
   }
   
   /*
+  * Account system cpu time to a process and desired cpustat field
+  * @p: the process that the cpu time gets accounted to
+  * @cputime: the cpu time spent in kernel space since the last update
+  * @cputime_scaled: cputime scaled by cpu frequency
+  * @target_cputime64: pointer to cpustat field that has to be updated
+  */
+ static inline
+ void __account_system_time(struct task_struct *p, cputime_t cputime,
+                       cputime_t cputime_scaled, cputime64_t *target_cputime64)
+ {
+       cputime64_t tmp = cputime_to_cputime64(cputime);
+ 
+       /* Add system time to process. */
+       p->stime = cputime_add(p->stime, cputime);
+       p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
+       account_group_system_time(p, cputime);
+ 
+       /* Add system time to cpustat. */
+       *target_cputime64 = cputime64_add(*target_cputime64, tmp);
+       cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+ 
+       /* Account for system time used */
+       acct_update_integrals(p);
+ }
+ 
+ /*
    * Account system cpu time to a process.
    * @p: the process that the cpu time gets accounted to
    * @hardirq_offset: the offset to subtract from hardirq_count()
@@@ -3557,36 -3670,26 +3649,26 @@@ void account_system_time(struct task_st
                          cputime_t cputime, cputime_t cputime_scaled)
   {
         struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
-       cputime64_t tmp;
+       cputime64_t *target_cputime64;
   
         if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
                 account_guest_time(p, cputime, cputime_scaled);
                 return;
         }
   
-       /* Add system time to process. */
-       p->stime = cputime_add(p->stime, cputime);
-       p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
-       account_group_system_time(p, cputime);
- 
-       /* Add system time to cpustat. */
-       tmp = cputime_to_cputime64(cputime);
         if (hardirq_count() - hardirq_offset)
-               cpustat->irq = cputime64_add(cpustat->irq, tmp);
+               target_cputime64 = &cpustat->irq;
         else if (in_serving_softirq())
-               cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+               target_cputime64 = &cpustat->softirq;
         else
-               cpustat->system = cputime64_add(cpustat->system, tmp);
- 
-       cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+               target_cputime64 = &cpustat->system;
   
-       /* Account for system time used */
-       acct_update_integrals(p);
+       __account_system_time(p, cputime, cputime_scaled, target_cputime64);
   }
   
   /*
    * Account for involuntary wait time.
-  * @steal: the cpu time spent in involuntary wait
+  * @cputime: the cpu time spent in involuntary wait
    */
   void account_steal_time(cputime_t cputime)
   {
@@@ -3614,6 -3717,73 +3696,73 @@@ void account_idle_time(cputime_t cputim
   
   #ifndef CONFIG_VIRT_CPU_ACCOUNTING
   
+ #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+ /*
+  * Account a tick to a process and cpustat
+  * @p: the process that the cpu time gets accounted to
+  * @user_tick: is the tick from userspace
+  * @rq: the pointer to rq
+  *
+  * Tick demultiplexing follows the order
+  * - pending hardirq update
+  * - pending softirq update
+  * - user_time
+  * - idle_time
+  * - system time
+  *   - check for guest_time
+  *   - else account as system_time
+  *
+  * Check for hardirq is done both for system and user time as there is
+  * no timer going off while we are on hardirq and hence we may never get an
+  * opportunity to update it solely in system time.
+  * p->stime and friends are only updated on system time and not on irq
+  * softirq as those do not count in task exec_runtime any more.
+  */
+ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+                                               struct rq *rq)
+ {
+       cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
+       cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
+       struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ 
+       if (irqtime_account_hi_update()) {
+               cpustat->irq = cputime64_add(cpustat->irq, tmp);
+       } else if (irqtime_account_si_update()) {
+               cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+       } else if (this_cpu_ksoftirqd() == p) {
+               /*
+                * ksoftirqd time do not get accounted in cpu_softirq_time.
+                * So, we have to handle it separately here.
+                * Also, p->stime needs to be updated for ksoftirqd.
+                */
+               __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+                                       &cpustat->softirq);
+       } else if (user_tick) {
+               account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
+       } else if (p == rq->idle) {
+               account_idle_time(cputime_one_jiffy);
+       } else if (p->flags & PF_VCPU) { /* System time or guest time */
+               account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
+       } else {
+               __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+                                       &cpustat->system);
+       }
+ }
+ 
+ static void irqtime_account_idle_ticks(int ticks)
+ {
+       int i;
+       struct rq *rq = this_rq();
+ 
+       for (i = 0; i < ticks; i++)
+               irqtime_account_process_tick(current, 0, rq);
+ }
+ #else /* CONFIG_IRQ_TIME_ACCOUNTING */
+ static void irqtime_account_idle_ticks(int ticks) {}
+ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+                                               struct rq *rq) {}
+ #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+ 
   /*
    * Account a single tick of cpu time.
    * @p: the process that the cpu time gets accounted to
@@@ -3624,6 -3794,11 +3773,11 @@@ void account_process_tick(struct task_s
         cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
         struct rq *rq = this_rq();
   
+       if (sched_clock_irqtime) {
+               irqtime_account_process_tick(p, user_tick, rq);
+               return;
+       }
+ 
         if (user_tick)
                 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
         else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@@ -3649,6 -3824,12 +3803,12 @@@ void account_steal_ticks(unsigned long 
    */
   void account_idle_ticks(unsigned long ticks)
   {
+ 
+       if (sched_clock_irqtime) {
+               irqtime_account_idle_ticks(ticks);
+               return;
+       }
+ 
         account_idle_time(jiffies_to_cputime(ticks));
   }
   
@@@ -3968,6 -4149,9 +4128,6 @@@ need_resched_nonpreemptible
         rq->skip_clock_update = 0;
   
         if (likely(prev != next)) {
- -              sched_info_switch(prev, next);
- -              perf_event_task_sched_out(prev, next);
- -
                 rq->nr_switches++;
                 rq->curr = next;
                 ++*switch_count;
@@@ -4189,7 -4373,6 +4349,7 @@@ void __wake_up_locked_key(wait_queue_he
   {
         __wake_up_common(q, mode, 1, 0, key);
   }
+ +EXPORT_SYMBOL_GPL(__wake_up_locked_key);
   
   /**
    * __wake_up_sync_key - wake up threads blocked on a waitqueue.
@@@ -4547,11 -4730,10 +4707,10 @@@ void rt_mutex_setprio(struct task_struc
   
         if (running)
                 p->sched_class->set_curr_task(rq);
-       if (on_rq) {
+       if (on_rq)
                 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
   
-               check_class_changed(rq, p, prev_class, oldprio, running);
-       }
+       check_class_changed(rq, p, prev_class, oldprio);
         task_rq_unlock(rq, &flags);
   }
   
@@@ -4799,12 -4981,15 +4958,15 @@@ recheck
                             param->sched_priority > rlim_rtprio)
                                 return -EPERM;
                 }
+ 
                 /*
-                * Like positive nice levels, dont allow tasks to
-                * move out of SCHED_IDLE either:
+                * Treat SCHED_IDLE as nice 20. Only allow a switch to
+                * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
                  */
-               if (p->policy == SCHED_IDLE && policy != SCHED_IDLE)
-                       return -EPERM;
+               if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
+                       if (!can_nice(p, TASK_NICE(p)))
+                               return -EPERM;
+               }
   
                 /* can't change other user's priorities */
                 if (!check_same_owner(p))
@@@ -4879,11 -5064,10 +5041,10 @@@
   
         if (running)
                 p->sched_class->set_curr_task(rq);
-       if (on_rq) {
+       if (on_rq)
                 activate_task(rq, p, 0);
   
-               check_class_changed(rq, p, prev_class, oldprio, running);
-       }
+       check_class_changed(rq, p, prev_class, oldprio);
         __task_rq_unlock(rq);
         raw_spin_unlock_irqrestore(&p->pi_lock, flags);
   
@@@ -5300,6 -5484,65 +5461,65 @@@ void __sched yield(void
   }
   EXPORT_SYMBOL(yield);
   
+ /**
+  * yield_to - yield the current processor to another thread in
+  * your thread group, or accelerate that thread toward the
+  * processor it's on.
+  *
+  * It's the caller's job to ensure that the target task struct
+  * can't go away on us before we can do any checks.
+  *
+  * Returns true if we indeed boosted the target task.
+  */
+ bool __sched yield_to(struct task_struct *p, bool preempt)
+ {
+       struct task_struct *curr = current;
+       struct rq *rq, *p_rq;
+       unsigned long flags;
+       bool yielded = 0;
+ 
+       local_irq_save(flags);
+       rq = this_rq();
+ 
+ again:
+       p_rq = task_rq(p);
+       double_rq_lock(rq, p_rq);
+       while (task_rq(p) != p_rq) {
+               double_rq_unlock(rq, p_rq);
+               goto again;
+       }
+ 
+       if (!curr->sched_class->yield_to_task)
+               goto out;
+ 
+       if (curr->sched_class != p->sched_class)
+               goto out;
+ 
+       if (task_running(p_rq, p) || p->state)
+               goto out;
+ 
+       yielded = curr->sched_class->yield_to_task(rq, p, preempt);
+       if (yielded) {
+               schedstat_inc(rq, yld_count);
+               /*
+                * Make p's CPU reschedule; pick_next_entity takes care of
+                * fairness.
+                */
+               if (preempt && rq != p_rq)
+                       resched_task(p_rq->curr);
+       }
+ 
+ out:
+       double_rq_unlock(rq, p_rq);
+       local_irq_restore(flags);
+ 
+       if (yielded)
+               schedule();
+ 
+       return yielded;
+ }
+ EXPORT_SYMBOL_GPL(yield_to);
+ 
   /*
    * This task is about to go to sleep on IO. Increment rq->nr_iowait so
    * that process accounting knows that this is a task in IO wait state.
@@@ -5548,7 -5791,7 +5768,7 @@@ void __cpuinit init_idle(struct task_st
          * The idle tasks have their own, simple scheduling class:
          */
         idle->sched_class = &idle_sched_class;
- -      ftrace_graph_init_task(idle);
+ +      ftrace_graph_init_idle_task(idle, cpu);
   }
   
   /*
@@@ -7773,6 -8016,10 +7993,10 @@@ static void init_cfs_rq(struct cfs_rq *
         INIT_LIST_HEAD(&cfs_rq->tasks);
   #ifdef CONFIG_FAIR_GROUP_SCHED
         cfs_rq->rq = rq;
+       /* allow initial update_cfs_load() to truncate */
+ #ifdef CONFIG_SMP
+       cfs_rq->load_stamp = 1;
+ #endif
   #endif
         cfs_rq->min_vruntime = (u64)(-(1LL << 20));
   }
@@@ -8086,6 -8333,8 +8310,8 @@@ EXPORT_SYMBOL(__might_sleep)
   #ifdef CONFIG_MAGIC_SYSRQ
   static void normalize_task(struct rq *rq, struct task_struct *p)
   {
+       const struct sched_class *prev_class = p->sched_class;
+       int old_prio = p->prio;
         int on_rq;
   
         on_rq = p->se.on_rq;
@@@ -8096,6 -8345,8 +8322,8 @@@
                 activate_task(rq, p, 0);
                 resched_task(rq->curr);
         }
+ 
+       check_class_changed(rq, p, prev_class, old_prio);
   }
   
   void normalize_rt_tasks(void)
@@@ -8487,7 -8738,7 +8715,7 @@@ int sched_group_set_shares(struct task_
                 /* Propagate contribution to hierarchy */
                 raw_spin_lock_irqsave(&rq->lock, flags);
                 for_each_sched_entity(se)
-                       update_cfs_shares(group_cfs_rq(se), 0);
+                       update_cfs_shares(group_cfs_rq(se));
                 raw_spin_unlock_irqrestore(&rq->lock, flags);
         }
   
@@@ -8861,8 -9112,7 +9089,8 @@@ cpu_cgroup_attach(struct cgroup_subsys 
   }
   
   static void
- -cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task)
+ +cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ +              struct cgroup *old_cgrp, struct task_struct *task)
   {
         /*
          * cgroup_exit() is called in the copy_process() failure path.
diff --combined kernel/sysctl.c

index 19b9d85,7b5eead..51054fe
--- 1/kernel/sysctl.c
--- 2/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -194,9 -194,9 +194,9 @@@ static int sysrq_sysctl_handler(ctl_tab
   static struct ctl_table root_table[];
   static struct ctl_table_root sysctl_table_root;
   static struct ctl_table_header root_table_header = {
- -      .count = 1,
+ +      {{.count = 1,
         .ctl_table = root_table,
- -      .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
+ +      .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
         .root = &sysctl_table_root,
         .set = &sysctl_table_root.default_set,
   };
@@@ -361,20 -361,13 +361,13 @@@ static struct ctl_table kern_table[] = 
                 .mode           = 0644,
                 .proc_handler   = sched_rt_handler,
         },
-       {
-               .procname       = "sched_compat_yield",
-               .data           = &sysctl_sched_compat_yield,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
   #ifdef CONFIG_SCHED_AUTOGROUP
         {
                 .procname       = "sched_autogroup_enabled",
                 .data           = &sysctl_sched_autogroup_enabled,
                 .maxlen         = sizeof(unsigned int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
                 .extra1         = &zero,
                 .extra2         = &one,
         },
@@@ -948,7 -941,7 +941,7 @@@
                 .data           = &sysctl_perf_event_sample_rate,
                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
                 .mode           = 0644,
- -              .proc_handler   = proc_dointvec,
+ +              .proc_handler   = perf_proc_update_handler,
         },
   #endif
   #ifdef CONFIG_KMEMCHECK
@@@ -1567,16 -1560,11 +1560,16 @@@ void sysctl_head_get(struct ctl_table_h
         spin_unlock(&sysctl_lock);
   }
   
+ +static void free_head(struct rcu_head *rcu)
+ +{
+ +      kfree(container_of(rcu, struct ctl_table_header, rcu));
+ +}
+ +
   void sysctl_head_put(struct ctl_table_header *head)
   {
         spin_lock(&sysctl_lock);
         if (!--head->count)
- -              kfree(head);
+ +              call_rcu(&head->rcu, free_head);
         spin_unlock(&sysctl_lock);
   }
   
@@@ -1953,10 -1941,10 +1946,10 @@@ void unregister_sysctl_table(struct ctl
         start_unregistering(header);
         if (!--header->parent->count) {
                 WARN_ON(1);
- -              kfree(header->parent);
+ +              call_rcu(&header->parent->rcu, free_head);
         }
         if (!--header->count)
- -              kfree(header);
+ +              call_rcu(&header->rcu, free_head);
         spin_unlock(&sysctl_lock);
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 16 Mar 2011 01:37:30 +0000 (18:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 16 Mar 2011 01:37:30 +0000 (18:37 -0700)
		1	2
include/linux/interrupt.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history