s390/smp: lost IPIs on cpu hotplug
authorHeiko Carstens <heiko.carstens@de.ibm.com>
Wed, 22 May 2013 08:24:37 +0000 (10:24 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 27 May 2013 07:16:15 +0000 (09:16 +0200)
IPIs might be lost when a cpu gets brought offline:

When stop_machine executes its state machine there is a race window
for the state STOPMACHINE_DISABLE_IRQ where the to be brought offline
cpu might already have irqs disabled but a different cpu still may
have irqs enabled.
If the enabled cpu receives an interrupt and as a result sends an IPI
to the to be offlined cpu in its bottom halve context, the IPI won't
be noticed before the cpu is offline.

In fact the race window is much larger since there is no guarantee
when an IPI will be received.

To fix this check for enqueued but not yet received IPIs in the
cpu_disable() path and call the respective handlers before the cpu
is marked offline.

Reported-by: Juergen Doelle <juergen.doelle@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/kernel/smp.c

index 05674b6690019012f3acdbb86ea5325616df9f76..4f977d0d25c2d1b1af674314a0b3da263d602b5a 100644 (file)
@@ -428,34 +428,27 @@ void smp_stop_cpu(void)
  * This is the main routine where commands issued by other
  * cpus are handled.
  */
-static void do_ext_call_interrupt(struct ext_code ext_code,
-                                 unsigned int param32, unsigned long param64)
+static void smp_handle_ext_call(void)
 {
        unsigned long bits;
-       int cpu;
-
-       cpu = smp_processor_id();
-       if (ext_code.code == 0x1202)
-               inc_irq_stat(IRQEXT_EXC);
-       else
-               inc_irq_stat(IRQEXT_EMS);
-       /*
-        * handle bit signal external calls
-        */
-       bits = xchg(&pcpu_devices[cpu].ec_mask, 0);
 
+       /* handle bit signal external calls */
+       bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
        if (test_bit(ec_stop_cpu, &bits))
                smp_stop_cpu();
-
        if (test_bit(ec_schedule, &bits))
                scheduler_ipi();
-
        if (test_bit(ec_call_function, &bits))
                generic_smp_call_function_interrupt();
-
        if (test_bit(ec_call_function_single, &bits))
                generic_smp_call_function_single_interrupt();
+}
 
+static void do_ext_call_interrupt(struct ext_code ext_code,
+                                 unsigned int param32, unsigned long param64)
+{
+       inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+       smp_handle_ext_call();
 }
 
 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
@@ -760,6 +753,8 @@ int __cpu_disable(void)
 {
        unsigned long cregs[16];
 
+       /* Handle possible pending IPIs */
+       smp_handle_ext_call();
        set_cpu_online(smp_processor_id(), false);
        /* Disable pseudo page faults on this cpu. */
        pfault_fini();