s390/cio: introduce locking for register/unregister functions
authorVineeth Vijayan <vneethv@linux.ibm.com>
Fri, 11 Nov 2022 12:46:33 +0000 (13:46 +0100)
committerHeiko Carstens <hca@linux.ibm.com>
Tue, 31 Jan 2023 17:56:36 +0000 (18:56 +0100)
Unbinding an I/O subchannel with a child-CCW device in disconnected
state sometimes causes a kernel-panic. The race condition was seen
mostly during testing, when setting all the CHPIDs of a device to
offline and at the same time, the unbinding the I/O subchannel driver.

The kernel-panic occurs because of double delete, the I/O subchannel
driver calls device_del on the CCW device while another device_del
invocation for the same device is in-flight.  For instance, disabling
all the CHPIDs will trigger the ccw_device_remove function, which will
call a ccw_device_unregister(), which ends up calling the device_del()
which is asynchronous via cdev's todo workqueue. And unbinding the I/O
subchannel driver calls io_subchannel_remove() function which calls the
ccw_device_unregister() and device_del().

This double delete can be prevented by serializing all CCW device
registration/unregistration calls into the driver core. This patch
introduces a mutex which will be used for this purpose.

Signed-off-by: Vineeth Vijayan <vneethv@linux.ibm.com>
Reported-by: Boris Fiuczynski <fiuczy@linux.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
arch/s390/include/asm/ccwdev.h
drivers/s390/cio/device.c

index bd15968..91d2617 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/fcx.h>
 #include <asm/irq.h>
 #include <asm/schid.h>
+#include <linux/mutex.h>
 
 /* structs from asm/cio.h */
 struct irb;
@@ -87,6 +88,7 @@ struct ccw_device {
        spinlock_t *ccwlock;
 /* private: */
        struct ccw_device_private *private;     /* cio private information */
+       struct mutex reg_mutex;
 /* public: */
        struct ccw_device_id id;
        struct ccw_driver *drv;
index 9e0cf44..5418e60 100644 (file)
@@ -244,10 +244,13 @@ int ccw_device_is_orphan(struct ccw_device *cdev)
 
 static void ccw_device_unregister(struct ccw_device *cdev)
 {
+       mutex_lock(&cdev->reg_mutex);
        if (device_is_registered(&cdev->dev)) {
                /* Undo device_add(). */
                device_del(&cdev->dev);
        }
+       mutex_unlock(&cdev->reg_mutex);
+
        if (cdev->private->flags.initialized) {
                cdev->private->flags.initialized = 0;
                /* Release reference from device_initialize(). */
@@ -653,11 +656,13 @@ static void ccw_device_do_unbind_bind(struct ccw_device *cdev)
 {
        int ret;
 
+       mutex_lock(&cdev->reg_mutex);
        if (device_is_registered(&cdev->dev)) {
                device_release_driver(&cdev->dev);
                ret = device_attach(&cdev->dev);
                WARN_ON(ret == -ENODEV);
        }
+       mutex_unlock(&cdev->reg_mutex);
 }
 
 static void
@@ -740,6 +745,7 @@ static int io_subchannel_initialize_dev(struct subchannel *sch,
        INIT_LIST_HEAD(&priv->cmb_list);
        init_waitqueue_head(&priv->wait_q);
        timer_setup(&priv->timer, ccw_device_timeout, 0);
+       mutex_init(&cdev->reg_mutex);
 
        atomic_set(&priv->onoff, 0);
        cdev->ccwlock = sch->lock;
@@ -825,6 +831,7 @@ static void io_subchannel_register(struct ccw_device *cdev)
         * be registered). We need to reprobe since we may now have sense id
         * information.
         */
+       mutex_lock(&cdev->reg_mutex);
        if (device_is_registered(&cdev->dev)) {
                if (!cdev->drv) {
                        ret = device_reprobe(&cdev->dev);
@@ -847,12 +854,14 @@ static void io_subchannel_register(struct ccw_device *cdev)
                spin_lock_irqsave(sch->lock, flags);
                sch_set_cdev(sch, NULL);
                spin_unlock_irqrestore(sch->lock, flags);
+               mutex_unlock(&cdev->reg_mutex);
                /* Release initial device reference. */
                put_device(&cdev->dev);
                goto out_err;
        }
 out:
        cdev->private->flags.recog_done = 1;
+       mutex_unlock(&cdev->reg_mutex);
        wake_up(&cdev->private->wait_q);
 out_err:
        if (adjust_init_count && atomic_dec_and_test(&ccw_device_init_count))