x86, mce: always use separate work queue to run trigger
authorAndi Kleen <andi@firstfloor.org>
Thu, 12 Feb 2009 12:39:28 +0000 (13:39 +0100)
committerH. Peter Anvin <hpa@linux.intel.com>
Tue, 17 Feb 2009 23:32:41 +0000 (15:32 -0800)
Impact: Needed for bug fix in next patch

This relaxes the requirement that mce_notify_user has to run in process
context. Useful for future changes, but also leads to cleaner
behaviour now. Now instead mce_notify_user can be called directly
from interrupt (but not NMI) context.

The work queue only uses a single global work struct, which can be done safely
because it is always free to reuse before the trigger function is executed.
This way no events can be lost.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
arch/x86/kernel/cpu/mcheck/mce_64.c

index 25ccdbe..18b379c 100644 (file)
@@ -380,11 +380,17 @@ static void mcheck_timer(struct work_struct *work)
        schedule_delayed_work(&mcheck_work, next_interval);
 }
 
+static void mce_do_trigger(struct work_struct *work)
+{
+       call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+}
+
+static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
+
 /*
- * This is only called from process context.  This is where we do
- * anything we need to alert userspace about new MCEs.  This is called
- * directly from the poller and also from entry.S and idle, thanks to
- * TIF_MCE_NOTIFY.
+ * Notify the user(s) about new machine check events.
+ * Can be called from interrupt context, but not from machine check/NMI
+ * context.
  */
 int mce_notify_user(void)
 {
@@ -394,9 +400,14 @@ int mce_notify_user(void)
                unsigned long now = jiffies;
 
                wake_up_interruptible(&mce_wait);
-               if (trigger[0])
-                       call_usermodehelper(trigger, trigger_argv, NULL,
-                                               UMH_NO_WAIT);
+
+               /*
+                * There is no risk of missing notifications because
+                * work_pending is always cleared before the function is
+                * executed.
+                */
+               if (trigger[0] && !work_pending(&mce_trigger_work))
+                       schedule_work(&mce_trigger_work);
 
                if (time_after_eq(now, last_print + (check_interval*HZ))) {
                        last_print = now;