From 8bb7844286fb8c9fce6f65d8288aeb09d03a5e0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 May 2007 02:35:10 -0700 Subject: [PATCH] Add suspend-related notifications for CPU hotplug Since nonboot CPUs are now disabled after tasks and devices have been frozen and the CPU hotplug infrastructure is used for this purpose, we need special CPU hotplug notifications that will help the CPU-hotplug-aware subsystems distinguish normal CPU hotplug events from CPU hotplug events related to a system-wide suspend or resume operation in progress. This patch introduces such notifications and causes them to be used during suspend and resume transitions. It also changes all of the CPU-hotplug-aware subsystems to take these notifications into consideration (for now they are handled in the same way as the corresponding "normal" ones). [oleg@tv-sign.ru: cleanups] Signed-off-by: Rafael J. Wysocki Cc: Gautham R Shenoy Cc: Pavel Machek Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpu-hotplug.txt | 9 ++++++-- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 ++ arch/i386/kernel/cpu/mcheck/therm_throt.c | 2 ++ arch/i386/kernel/cpuid.c | 2 ++ arch/i386/kernel/microcode.c | 3 +++ arch/i386/kernel/msr.c | 2 ++ arch/ia64/kernel/err_inject.c | 2 ++ arch/ia64/kernel/palinfo.c | 2 ++ arch/ia64/kernel/salinfo.c | 2 ++ arch/ia64/kernel/topology.c | 2 ++ arch/powerpc/kernel/sysfs.c | 2 ++ arch/powerpc/mm/numa.c | 3 +++ arch/s390/appldata/appldata_base.c | 2 ++ arch/s390/kernel/smp.c | 2 ++ arch/x86_64/kernel/mce.c | 2 ++ arch/x86_64/kernel/mce_amd.c | 2 ++ arch/x86_64/kernel/vsyscall.c | 2 +- block/ll_rw_blk.c | 2 +- drivers/base/topology.c | 3 +++ drivers/cpufreq/cpufreq.c | 3 +++ drivers/cpufreq/cpufreq_stats.c | 2 ++ drivers/hwmon/coretemp.c | 2 ++ drivers/infiniband/hw/ehca/ehca_irq.c | 6 ++++++ drivers/kvm/kvm_main.c | 3 +++ fs/buffer.c | 2 +- fs/xfs/xfs_mount.c | 3 +++ include/linux/notifier.h | 12 +++++++++++ kernel/cpu.c | 34 ++++++++++++++++--------------- kernel/hrtimer.c | 2 ++ kernel/profile.c | 4 ++++ kernel/rcupdate.c | 2 ++ kernel/relay.c | 2 ++ kernel/sched.c | 10 +++++++++ kernel/softirq.c | 4 ++++ kernel/softlockup.c | 4 ++++ kernel/timer.c | 2 ++ kernel/workqueue.c | 2 ++ lib/radix-tree.c | 2 +- mm/page_alloc.c | 5 ++++- mm/slab.c | 6 ++++++ mm/slub.c | 2 ++ mm/swap.c | 2 +- mm/vmscan.c | 2 +- mm/vmstat.c | 3 +++ net/core/dev.c | 2 +- net/core/flow.c | 2 +- net/iucv/iucv.c | 6 ++++++ 47 files changed, 152 insertions(+), 27 deletions(-) diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index cc60d29..b6d24c2 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -217,14 +217,17 @@ Q: What happens when a CPU is being logically offlined? A: The following happen, listed in no particular order :-) - A notification is sent to in-kernel registered modules by sending an event - CPU_DOWN_PREPARE + CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the + CPU is being offlined while tasks are frozen due to a suspend operation in + progress - All process is migrated away from this outgoing CPU to a new CPU - All interrupts targeted to this CPU is migrated to a new CPU - timers/bottom half/task lets are also migrated to a new CPU - Once all services are migrated, kernel calls an arch specific routine __cpu_disable() to perform arch specific cleanup. - Once this is successful, an event for successful cleanup is sent by an event - CPU_DEAD. + CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the + CPU is being offlined). "It is expected that each service cleans up when the CPU_DOWN_PREPARE notifier is called, when CPU_DEAD is called its expected there is nothing @@ -242,9 +245,11 @@ A: This is what you would need in your kernel code to receive notifications. switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: foobar_online_action(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: foobar_dead_action(cpu); break; } diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 80b4c5d..e5be819 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -733,9 +733,11 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cache_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cache_remove_dev(sys_dev); break; } diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 065005c..5b0a040 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -137,10 +137,12 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, mutex_lock(&therm_cpu_lock); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: err = thermal_throttle_add_dev(sys_dev); WARN_ON(err); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: thermal_throttle_remove_dev(sys_dev); break; } diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index eeae0d9..5c2faa1 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -169,9 +169,11 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long ac switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpuid_device_create(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); break; } diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index cbe7ec8..7d934e4 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -775,10 +775,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: mc_sysdev_add(sys_dev); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: mc_sysdev_remove(sys_dev); break; } diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 8cd0a91..0c1069b 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -153,9 +153,11 @@ static int msr_class_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: msr_device_create(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); break; } diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index d3e9f33..6a49600 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -236,9 +236,11 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: err_inject_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: err_inject_remove_dev(sys_dev); break; } diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index a71df9a..85829e2 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -975,9 +975,11 @@ static int palinfo_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: create_palinfo_proc_entries(hotcpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: remove_palinfo_proc_entries(hotcpu); break; } diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index a51f1d0..89f6b13 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -582,6 +582,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu struct salinfo_data *data; switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); @@ -592,6 +593,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu spin_unlock_irqrestore(&data_saved_lock, flags); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 687500d..94ae3c8 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -412,9 +412,11 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cache_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cache_remove_dev(sys_dev); break; } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index cae39d9..68991c2 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -342,10 +342,12 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: register_cpu_online(cpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: unregister_cpu_online(cpu); break; #endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b3a592b..de45aa8 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -252,12 +252,15 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: numa_setup_cpu(lcpu); ret = NOTIFY_OK; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); break; ret = NOTIFY_OK; diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ee89b33..81a2b92 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -567,9 +567,11 @@ appldata_cpu_notify(struct notifier_block *self, { switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: appldata_online_cpu((long) hcpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: appldata_offline_cpu((long) hcpu); break; default: diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b797702..09f028a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -789,10 +789,12 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: if (sysdev_create_file(s, &attr_capability)) return NOTIFY_BAD; break; case CPU_DEAD: + case CPU_DEAD_FROZEN: sysdev_remove_file(s, &attr_capability); break; } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 44216964..a14375d 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -720,9 +720,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: mce_create_device(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: mce_remove_device(cpu); break; } diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d0bd5d6..03356e6 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -654,9 +654,11 @@ static int threshold_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: threshold_create_device(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: threshold_remove_device(cpu); break; default: diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index dc32cef..51d4c6f 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -327,7 +327,7 @@ static int __cpuinit cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) { long cpu = (long)arg; - if (action == CPU_ONLINE) + if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); return NOTIFY_DONE; } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index df50657..cd54672 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3507,7 +3507,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, * If a CPU goes away, splice its entries to the current CPU * and trigger a run of the softirq */ - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { int cpu = (unsigned long) hcpu; local_irq_disable(); diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 067a9e8..8d8cdfe 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -126,10 +126,13 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: rc = topology_add_dev(cpu); break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: topology_remove_dev(cpu); break; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 893dbaf..eb37fba 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1685,9 +1685,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, if (sys_dev) { switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpufreq_add_dev(sys_dev); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: if (unlikely(lock_policy_rwsem_write(cpu))) BUG(); @@ -1699,6 +1701,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, __cpufreq_remove_dev(sys_dev); break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: cpufreq_add_dev(sys_dev); break; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index d1c7cac..d2f0cbd 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -313,9 +313,11 @@ static int cpufreq_stat_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpufreq_update_policy(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cpufreq_stats_free_table(cpu); break; } diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 03b1f65..75e3911 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -309,9 +309,11 @@ static int coretemp_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: coretemp_device_add(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: coretemp_device_remove(cpu); break; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index f284be1..82dda2f 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -745,6 +745,7 @@ static int comp_pool_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); if(!create_comp_task(pool, cpu)) { ehca_gen_err("Can't create comp_task for cpu: %x", cpu); @@ -752,24 +753,29 @@ static int comp_pool_callback(struct notifier_block *nfb, } break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); kthread_bind(cct->task, any_online_cpu(cpu_online_map)); destroy_comp_task(pool, cpu); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); kthread_bind(cct->task, cpu); wake_up_process(cct->task); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); destroy_comp_task(pool, cpu); take_over_work(pool, cpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c8b8cfa..0d89260 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2889,7 +2889,9 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, switch (val) { case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); decache_vcpus_on_cpu(cpu); @@ -2897,6 +2899,7 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, NULL, 0, 1); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, diff --git a/fs/buffer.c b/fs/buffer.c index fc2d763..aecd057 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2946,7 +2946,7 @@ static void buffer_exit_cpu(int cpu) static int buffer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) buffer_exit_cpu((unsigned long)hcpu); return NOTIFY_OK; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f5aa3ef..a96bde6 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1734,11 +1734,13 @@ xfs_icsb_cpu_notify( per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: /* Easy Case - initialize the area and locks, and * then rebalance when online does everything else for us. */ memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: xfs_icsb_lock(mp); xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); @@ -1746,6 +1748,7 @@ xfs_icsb_cpu_notify( xfs_icsb_unlock(mp); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* Disable all the counters, then fold the dead cpu's * count into the total on the global superblock and * re-enable the counters. */ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 1903e54..9431101 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -197,5 +197,17 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ #define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ +/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend + * operation in progress + */ +#define CPU_TASKS_FROZEN 0x0010 + +#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) +#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) +#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) +#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) +#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) +#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) + #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 28cb6c7..369d289 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -120,12 +120,13 @@ static int take_cpu_down(void *unused) } /* Requires cpu_add_remove_lock to be held */ -static int _cpu_down(unsigned int cpu) +static int _cpu_down(unsigned int cpu, int tasks_frozen) { int err, nr_calls = 0; struct task_struct *p; cpumask_t old_allowed, tmp; void *hcpu = (void *)(long)cpu; + unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; if (num_online_cpus() == 1) return -EBUSY; @@ -134,11 +135,11 @@ static int _cpu_down(unsigned int cpu) return -EINVAL; raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); - err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, + err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); if (err == NOTIFY_BAD) { - __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, hcpu, - nr_calls, NULL); + __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, + hcpu, nr_calls, NULL); printk("%s: attempt to take down CPU %u failed\n", __FUNCTION__, cpu); err = -EINVAL; @@ -157,7 +158,7 @@ static int _cpu_down(unsigned int cpu) if (IS_ERR(p) || cpu_online(cpu)) { /* CPU didn't die: tell everyone. Can't complain. */ - if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, + if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, hcpu) == NOTIFY_BAD) BUG(); @@ -176,7 +177,8 @@ static int _cpu_down(unsigned int cpu) __cpu_die(cpu); /* CPU is completely dead: tell everyone. Too late to complain. */ - if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD, hcpu) == NOTIFY_BAD) + if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod, + hcpu) == NOTIFY_BAD) BUG(); check_for_tasks(cpu); @@ -186,8 +188,7 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out_release: - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, - (void *)(long)cpu); + raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); return err; } @@ -199,7 +200,7 @@ int cpu_down(unsigned int cpu) if (cpu_hotplug_disabled) err = -EBUSY; else - err = _cpu_down(cpu); + err = _cpu_down(cpu, 0); mutex_unlock(&cpu_add_remove_lock); return err; @@ -207,16 +208,17 @@ int cpu_down(unsigned int cpu) #endif /*CONFIG_HOTPLUG_CPU*/ /* Requires cpu_add_remove_lock to be held */ -static int __cpuinit _cpu_up(unsigned int cpu) +static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) { int ret, nr_calls = 0; void *hcpu = (void *)(long)cpu; + unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; if (cpu_online(cpu) || !cpu_present(cpu)) return -EINVAL; raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); - ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu, + ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret == NOTIFY_BAD) { printk("%s: attempt to bring up CPU %u failed\n", @@ -234,12 +236,12 @@ static int __cpuinit _cpu_up(unsigned int cpu) BUG_ON(!cpu_online(cpu)); /* Now call notifier in preparation. */ - raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); + raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); out_notify: if (ret != 0) __raw_notifier_call_chain(&cpu_chain, - CPU_UP_CANCELED, hcpu, nr_calls, NULL); + CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); return ret; @@ -253,7 +255,7 @@ int __cpuinit cpu_up(unsigned int cpu) if (cpu_hotplug_disabled) err = -EBUSY; else - err = _cpu_up(cpu); + err = _cpu_up(cpu, 0); mutex_unlock(&cpu_add_remove_lock); return err; @@ -283,7 +285,7 @@ int disable_nonboot_cpus(void) for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; - error = _cpu_down(cpu); + error = _cpu_down(cpu, 1); if (!error) { cpu_set(cpu, frozen_cpus); printk("CPU%d is down\n", cpu); @@ -318,7 +320,7 @@ void enable_nonboot_cpus(void) suspend_cpu_hotplug = 1; printk("Enabling non-boot CPUs ...\n"); for_each_cpu_mask(cpu, frozen_cpus) { - error = _cpu_up(cpu); + error = _cpu_up(cpu, 1); if (!error) { printk("CPU%d is up\n", cpu); continue; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c9f4f04..23c03f4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1411,11 +1411,13 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: init_hrtimers_cpu(cpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); migrate_hrtimers(cpu); break; diff --git a/kernel/profile.c b/kernel/profile.c index 9bfadb2..cc91b9b 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -340,6 +340,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: node = cpu_to_node(cpu); per_cpu(cpu_profile_flip, cpu) = 0; if (!per_cpu(cpu_profile_hits, cpu)[1]) { @@ -365,10 +366,13 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, __free_page(page); return NOTIFY_BAD; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpu_set(cpu, prof_cpu_mask); break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: cpu_clear(cpu, prof_cpu_mask); if (per_cpu(cpu_profile_hits, cpu)[0]) { page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 3554b76..2c2dd84 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -558,9 +558,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: rcu_online_cpu(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: rcu_offline_cpu(cpu); break; default: diff --git a/kernel/relay.c b/kernel/relay.c index e804589..61a5049 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -484,6 +484,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, switch(action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: mutex_lock(&relay_channels_mutex); list_for_each_entry(chan, &relay_channels, list) { if (chan->buf[hotcpu]) @@ -500,6 +501,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, mutex_unlock(&relay_channels_mutex); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* No need to flush the cpu : will be flushed upon * final relay_flush() call. */ break; diff --git a/kernel/sched.c b/kernel/sched.c index fe1a9c2..799d23b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5394,6 +5394,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); if (IS_ERR(p)) return NOTIFY_BAD; @@ -5407,12 +5408,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: /* Strictly unneccessary, as first user will wake it. */ wake_up_process(cpu_rq(cpu)->migration_thread); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: if (!cpu_rq(cpu)->migration_thread) break; /* Unbind it from offline cpu so it can run. Fall thru. */ @@ -5423,6 +5426,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DEAD: + case CPU_DEAD_FROZEN: migrate_live_tasks(cpu); rq = cpu_rq(cpu); kthread_stop(rq->migration_thread); @@ -6912,14 +6916,20 @@ static int update_sched_domains(struct notifier_block *nfb, { switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: detach_destroy_domains(&cpu_online_map); return NOTIFY_OK; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: case CPU_ONLINE: + case CPU_ONLINE_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: /* * Fall through and re-initialise the domains. */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 8b75008..0b9886a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -593,6 +593,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); if (IS_ERR(p)) { printk("ksoftirqd for %i failed\n", hotcpu); @@ -602,16 +603,19 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, per_cpu(ksoftirqd, hotcpu) = p; break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: wake_up_process(per_cpu(ksoftirqd, hotcpu)); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: if (!per_cpu(ksoftirqd, hotcpu)) break; /* Unbind so it can run. Fall thru. */ kthread_bind(per_cpu(ksoftirqd, hotcpu), any_online_cpu(cpu_online_map)); case CPU_DEAD: + case CPU_DEAD_FROZEN: p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; kthread_stop(p); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 8fa7040..0131e29 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -146,6 +146,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: BUG_ON(per_cpu(watchdog_task, hotcpu)); p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); if (IS_ERR(p)) { @@ -157,16 +158,19 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) kthread_bind(p, hotcpu); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: wake_up_process(per_cpu(watchdog_task, hotcpu)); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: if (!per_cpu(watchdog_task, hotcpu)) break; /* Unbind so it can run. Fall thru. */ kthread_bind(per_cpu(watchdog_task, hotcpu), any_online_cpu(cpu_online_map)); case CPU_DEAD: + case CPU_DEAD_FROZEN: p = per_cpu(watchdog_task, hotcpu); per_cpu(watchdog_task, hotcpu) = NULL; kthread_stop(p); diff --git a/kernel/timer.c b/kernel/timer.c index 58f6dd0..de85f84 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1293,11 +1293,13 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; switch(action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: if (init_timers_cpu(cpu) < 0) return NOTIFY_BAD; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: migrate_timers(cpu); break; #endif diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b976ed8..fb56fed 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -799,6 +799,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, struct cpu_workqueue_struct *cwq; struct workqueue_struct *wq; + action &= ~CPU_TASKS_FROZEN; + switch (action) { case CPU_LOCK_ACQUIRE: mutex_lock(&workqueue_mutex); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d69ddbe..402eb4e 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1004,7 +1004,7 @@ static int radix_tree_callback(struct notifier_block *nfb, struct radix_tree_preload *rtp; /* Free per-cpu pool of perloaded nodes */ - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { rtp = &per_cpu(radix_tree_preloads, cpu); while (rtp->nr) { kmem_cache_free(radix_tree_node_cachep, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6fd0b74..d53cbf8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2148,11 +2148,14 @@ static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: if (process_zones(cpu)) ret = NOTIFY_BAD; break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: free_zone_pagesets(cpu); break; default: @@ -3012,7 +3015,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, { int cpu = (unsigned long)hcpu; - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { local_irq_disable(); __drain_pages(cpu); vm_events_fold_cpu(cpu); diff --git a/mm/slab.c b/mm/slab.c index 1a7a10d..6f3d6e2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1190,6 +1190,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, mutex_lock(&cache_chain_mutex); break; case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: /* * We need to do this right in the beginning since * alloc_arraycache's are going to use this list. @@ -1276,10 +1277,12 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, } break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: start_cpu_timer(cpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: /* * Shutdown cache reaper. Note that the cache_chain_mutex is * held so that if cache_reap() is invoked it cannot do @@ -1291,9 +1294,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, per_cpu(reap_work, cpu).work.func = NULL; break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: start_cpu_timer(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* * Even if all the cpus of a node are down, we don't free the * kmem_list3 of any cache. This to avoid a race between @@ -1305,6 +1310,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, /* fall thru */ #endif case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; diff --git a/mm/slub.c b/mm/slub.c index f7c120b..a581fa8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2514,7 +2514,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: for_all_slabs(__flush_cpu_slab, cpu); break; default: diff --git a/mm/swap.c b/mm/swap.c index 218c52a..d3cb966 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -488,7 +488,7 @@ static int cpu_swap_callback(struct notifier_block *nfb, long *committed; committed = &per_cpu(committed_space, (long)hcpu); - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { atomic_add(*committed, &vm_committed_space); *committed = 0; __lru_add_drain((long)hcpu); diff --git a/mm/vmscan.c b/mm/vmscan.c index 1c8e75a1..1be5a63 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1528,7 +1528,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, pg_data_t *pgdat; cpumask_t mask; - if (action == CPU_ONLINE) { + if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { for_each_online_pgdat(pgdat) { mask = node_to_cpumask(pgdat->node_id); if (any_online_cpu(mask) != NR_CPUS) diff --git a/mm/vmstat.c b/mm/vmstat.c index 6c488d6..9a66dc4 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -650,8 +650,11 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, { switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: refresh_zone_stat_thresholds(); break; default: diff --git a/net/core/dev.c b/net/core/dev.c index 4317c1b..8301e2a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3450,7 +3450,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; - if (action != CPU_DEAD) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; local_irq_disable(); diff --git a/net/core/flow.c b/net/core/flow.c index 5d25697..0514305 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -338,7 +338,7 @@ static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) __flow_cache_shrink((unsigned long)hcpu, 0); return NOTIFY_OK; } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fb3faf7..b733306 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -556,6 +556,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: if (!percpu_populate(iucv_irq_data, sizeof(struct iucv_irq_data), GFP_KERNEL|GFP_DMA, cpu)) @@ -567,15 +568,20 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, } break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: percpu_depopulate(iucv_param, cpu); percpu_depopulate(iucv_irq_data, cpu); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: cpumask = iucv_buffer_cpumask; cpu_clear(cpu, cpumask); if (cpus_empty(cpumask)) -- 2.7.4