Currently many console drivers for s390 rely on panic/reboot notifiers
to invoke callbacks on these events. The panic() function disables local
IRQs, secondary CPUs and preemption, so callbacks invoked on panic are
effectively running in atomic context.
Happens that most of these console callbacks from s390 doesn't take the
proper care with regards to atomic context, like taking spinlocks that
might be taken in other function/CPU and hence will cause a lockup
situation.
The goal for this patch is to improve the notifiers reliability, acting
on 4 console drivers, as detailed below:
(1) con3215: changed a regular spinlock to the trylock alternative.
(2) con3270: also changed a regular spinlock to its trylock counterpart,
but here we also have another problem: raw3270_activate_view() takes a
different spinlock. So, we worked a helper to validate if this other lock
is safe to acquire, and if so, raw3270_activate_view() should be safe.
Notice though that there is a functional change here: it's now possible
to continue the notifier code [reaching con3270_wait_write() and
con3270_rebuild_update()] without executing raw3270_activate_view().
(3) sclp: a global lock is used heavily in the functions called from
the notifier, so we added a check here - if the lock is taken already,
we just bail-out, preventing the lockup.
(4) sclp_vt220: same as (3), a lock validation was added to prevent the
potential lockup problem.
Besides (1)-(4), we also removed useless void functions, adding the
code called from the notifier inside its own body, and changed the
priority of such notifiers to execute late, since they are "heavyweight"
for the panic environment, so we aim to reduce risks here.
Changed return values to NOTIFY_DONE as well, the standard one.
Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Link: https://lore.kernel.org/r/20220427224924.592546-14-gpiccoli@igalia.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
}
/*
- * panic() calls con3215_flush through a panic_notifier
- * before the system enters a disabled, endless loop.
+ * The below function is called as a panic/reboot notifier before the
+ * system enters a disabled, endless loop.
+ *
+ * Notice we must use the spin_trylock() alternative, to prevent lockups
+ * in atomic context (panic routine runs with secondary CPUs, local IRQs
+ * and preemption disabled).
*/
-static void con3215_flush(void)
+static int con3215_notify(struct notifier_block *self,
+ unsigned long event, void *data)
{
struct raw3215_info *raw;
unsigned long flags;
raw = raw3215[0]; /* console 3215 is the first one */
- spin_lock_irqsave(get_ccwdev_lock(raw->cdev), flags);
+ if (!spin_trylock_irqsave(get_ccwdev_lock(raw->cdev), flags))
+ return NOTIFY_DONE;
raw3215_make_room(raw, RAW3215_BUFFER_SIZE);
spin_unlock_irqrestore(get_ccwdev_lock(raw->cdev), flags);
-}
-static int con3215_notify(struct notifier_block *self,
- unsigned long event, void *data)
-{
- con3215_flush();
- return NOTIFY_OK;
+ return NOTIFY_DONE;
}
static struct notifier_block on_panic_nb = {
.notifier_call = con3215_notify,
- .priority = 0,
+ .priority = INT_MIN + 1, /* run the callback late */
};
static struct notifier_block on_reboot_nb = {
.notifier_call = con3215_notify,
- .priority = 0,
+ .priority = INT_MIN + 1, /* run the callback late */
};
/*
}
/*
- * panic() calls con3270_flush through a panic_notifier
- * before the system enters a disabled, endless loop.
+ * The below function is called as a panic/reboot notifier before the
+ * system enters a disabled, endless loop.
+ *
+ * Notice we must use the spin_trylock() alternative, to prevent lockups
+ * in atomic context (panic routine runs with secondary CPUs, local IRQs
+ * and preemption disabled).
*/
-static void
-con3270_flush(void)
+static int con3270_notify(struct notifier_block *self,
+ unsigned long event, void *data)
{
struct con3270 *cp;
unsigned long flags;
cp = condev;
if (!cp->view.dev)
- return;
- raw3270_activate_view(&cp->view);
- spin_lock_irqsave(&cp->view.lock, flags);
+ return NOTIFY_DONE;
+ if (!raw3270_view_lock_unavailable(&cp->view))
+ raw3270_activate_view(&cp->view);
+ if (!spin_trylock_irqsave(&cp->view.lock, flags))
+ return NOTIFY_DONE;
con3270_wait_write(cp);
cp->nr_up = 0;
con3270_rebuild_update(cp);
con3270_wait_write(cp);
}
spin_unlock_irqrestore(&cp->view.lock, flags);
-}
-static int con3270_notify(struct notifier_block *self,
- unsigned long event, void *data)
-{
- con3270_flush();
- return NOTIFY_OK;
+ return NOTIFY_DONE;
}
static struct notifier_block on_panic_nb = {
.notifier_call = con3270_notify,
- .priority = 0,
+ .priority = INT_MIN + 1, /* run the callback late */
};
static struct notifier_block on_reboot_nb = {
.notifier_call = con3270_notify,
- .priority = 0,
+ .priority = INT_MIN + 1, /* run the callback late */
};
/*
}
/*
+ * This helper just validates that it is safe to activate a
+ * view in the panic() context, due to locking restrictions.
+ */
+int raw3270_view_lock_unavailable(struct raw3270_view *view)
+{
+ struct raw3270 *rp = view->dev;
+
+ if (!rp)
+ return -ENODEV;
+ if (spin_is_locked(get_ccwdev_lock(rp->cdev)))
+ return -EBUSY;
+ return 0;
+}
+
+/*
* Activate a view.
*/
int
};
int raw3270_add_view(struct raw3270_view *, struct raw3270_fn *, int, int);
+int raw3270_view_lock_unavailable(struct raw3270_view *view);
int raw3270_activate_view(struct raw3270_view *);
void raw3270_del_view(struct raw3270_view *);
void raw3270_deactivate_view(struct raw3270_view *);
}
/*
- * Make sure that all buffers will be flushed to the SCLP.
+ * This panic/reboot notifier makes sure that all buffers
+ * will be flushed to the SCLP.
*/
-static void
-sclp_console_flush(void)
+static int sclp_console_notify(struct notifier_block *self,
+ unsigned long event, void *data)
{
+ /*
+ * Perform the lock check before effectively getting the
+ * lock on sclp_conbuf_emit() / sclp_console_sync_queue()
+ * to prevent potential lockups in atomic context.
+ */
+ if (spin_is_locked(&sclp_con_lock))
+ return NOTIFY_DONE;
+
sclp_conbuf_emit();
sclp_console_sync_queue();
-}
-static int sclp_console_notify(struct notifier_block *self,
- unsigned long event, void *data)
-{
- sclp_console_flush();
- return NOTIFY_OK;
+ return NOTIFY_DONE;
}
static struct notifier_block on_panic_nb = {
.notifier_call = sclp_console_notify,
- .priority = 1,
+ .priority = INT_MIN + 1, /* run the callback late */
};
static struct notifier_block on_reboot_nb = {
.notifier_call = sclp_console_notify,
- .priority = 1,
+ .priority = INT_MIN + 1, /* run the callback late */
};
/*
#ifdef CONFIG_SCLP_VT220_CONSOLE
-static void __sclp_vt220_flush_buffer(void)
-{
- unsigned long flags;
-
- sclp_vt220_emit_current();
- spin_lock_irqsave(&sclp_vt220_lock, flags);
- del_timer(&sclp_vt220_timer);
- while (sclp_vt220_queue_running) {
- spin_unlock_irqrestore(&sclp_vt220_lock, flags);
- sclp_sync_wait();
- spin_lock_irqsave(&sclp_vt220_lock, flags);
- }
- spin_unlock_irqrestore(&sclp_vt220_lock, flags);
-}
-
static void
sclp_vt220_con_write(struct console *con, const char *buf, unsigned int count)
{
return sclp_vt220_driver;
}
+/*
+ * This panic/reboot notifier runs in atomic context, so
+ * locking restrictions apply to prevent potential lockups.
+ */
static int
sclp_vt220_notify(struct notifier_block *self,
unsigned long event, void *data)
{
- __sclp_vt220_flush_buffer();
- return NOTIFY_OK;
+ unsigned long flags;
+
+ if (spin_is_locked(&sclp_vt220_lock))
+ return NOTIFY_DONE;
+
+ sclp_vt220_emit_current();
+
+ spin_lock_irqsave(&sclp_vt220_lock, flags);
+ del_timer(&sclp_vt220_timer);
+ while (sclp_vt220_queue_running) {
+ spin_unlock_irqrestore(&sclp_vt220_lock, flags);
+ sclp_sync_wait();
+ spin_lock_irqsave(&sclp_vt220_lock, flags);
+ }
+ spin_unlock_irqrestore(&sclp_vt220_lock, flags);
+
+ return NOTIFY_DONE;
}
static struct notifier_block on_panic_nb = {
.notifier_call = sclp_vt220_notify,
- .priority = 1,
+ .priority = INT_MIN + 1, /* run the callback late */
};
static struct notifier_block on_reboot_nb = {
.notifier_call = sclp_vt220_notify,
- .priority = 1,
+ .priority = INT_MIN + 1, /* run the callback late */
};
/* Structure needed to register with printk */