s390/dasd: fix hanging device after request requeue
authorStefan Haberland <sth@linux.ibm.com>
Fri, 21 Jul 2023 19:36:46 +0000 (21:36 +0200)
committerJens Axboe <axboe@kernel.dk>
Mon, 24 Jul 2023 19:05:29 +0000 (13:05 -0600)
The DASD device driver has a function to requeue requests to the
blocklayer.
This function is used in various cases when basic settings for the device
have to be changed like High Performance Ficon related parameters or copy
pair settings.

The functions iterates over the device->ccw_queue and also removes the
requests from the block->ccw_queue.
In case the device is started on an alias device instead of the base
device it might be removed from the block->ccw_queue without having it
canceled properly before. This might lead to a hanging device since the
request is no longer on a queue and can not be handled properly.

Fix by iterating over the block->ccw_queue instead of the
device->ccw_queue. This will take care of all blocklayer related requests
and handle them on all associated DASD devices.

Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
Link: https://lore.kernel.org/r/20230721193647.3889634-4-sth@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/s390/block/dasd.c

index edcbf77..50a5ff7 100644 (file)
@@ -2943,41 +2943,32 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
  * Requeue a request back to the block request queue
  * only works for block requests
  */
-static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
+static void _dasd_requeue_request(struct dasd_ccw_req *cqr)
 {
-       struct dasd_block *block = cqr->block;
        struct request *req;
 
-       if (!block)
-               return -EINVAL;
        /*
         * If the request is an ERP request there is nothing to requeue.
         * This will be done with the remaining original request.
         */
        if (cqr->refers)
-               return 0;
+               return;
        spin_lock_irq(&cqr->dq->lock);
        req = (struct request *) cqr->callback_data;
        blk_mq_requeue_request(req, true);
        spin_unlock_irq(&cqr->dq->lock);
 
-       return 0;
+       return;
 }
 
-/*
- * Go through all request on the dasd_block request queue, cancel them
- * on the respective dasd_device, and return them to the generic
- * block layer.
- */
-static int dasd_flush_block_queue(struct dasd_block *block)
+static int _dasd_requests_to_flushqueue(struct dasd_block *block,
+                                       struct list_head *flush_queue)
 {
        struct dasd_ccw_req *cqr, *n;
-       int rc, i;
-       struct list_head flush_queue;
        unsigned long flags;
+       int rc, i;
 
-       INIT_LIST_HEAD(&flush_queue);
-       spin_lock_bh(&block->queue_lock);
+       spin_lock_irqsave(&block->queue_lock, flags);
        rc = 0;
 restart:
        list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
@@ -2992,13 +2983,32 @@ restart:
                 * is returned from the dasd_device layer.
                 */
                cqr->callback = _dasd_wake_block_flush_cb;
-               for (i = 0; cqr != NULL; cqr = cqr->refers, i++)
-                       list_move_tail(&cqr->blocklist, &flush_queue);
+               for (i = 0; cqr; cqr = cqr->refers, i++)
+                       list_move_tail(&cqr->blocklist, flush_queue);
                if (i > 1)
                        /* moved more than one request - need to restart */
                        goto restart;
        }
-       spin_unlock_bh(&block->queue_lock);
+       spin_unlock_irqrestore(&block->queue_lock, flags);
+
+       return rc;
+}
+
+/*
+ * Go through all request on the dasd_block request queue, cancel them
+ * on the respective dasd_device, and return them to the generic
+ * block layer.
+ */
+static int dasd_flush_block_queue(struct dasd_block *block)
+{
+       struct dasd_ccw_req *cqr, *n;
+       struct list_head flush_queue;
+       unsigned long flags;
+       int rc;
+
+       INIT_LIST_HEAD(&flush_queue);
+       rc = _dasd_requests_to_flushqueue(block, &flush_queue);
+
        /* Now call the callback function of flushed requests */
 restart_cb:
        list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
@@ -3881,75 +3891,36 @@ EXPORT_SYMBOL_GPL(dasd_generic_space_avail);
  */
 int dasd_generic_requeue_all_requests(struct dasd_device *device)
 {
+       struct dasd_block *block = device->block;
        struct list_head requeue_queue;
        struct dasd_ccw_req *cqr, *n;
-       struct dasd_ccw_req *refers;
        int rc;
 
-       INIT_LIST_HEAD(&requeue_queue);
-       spin_lock_irq(get_ccwdev_lock(device->cdev));
-       rc = 0;
-       list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
-               /* Check status and move request to flush_queue */
-               if (cqr->status == DASD_CQR_IN_IO) {
-                       rc = device->discipline->term_IO(cqr);
-                       if (rc) {
-                               /* unable to terminate requeust */
-                               dev_err(&device->cdev->dev,
-                                       "Unable to terminate request %p "
-                                       "on suspend\n", cqr);
-                               spin_unlock_irq(get_ccwdev_lock(device->cdev));
-                               dasd_put_device(device);
-                               return rc;
-                       }
-               }
-               list_move_tail(&cqr->devlist, &requeue_queue);
-       }
-       spin_unlock_irq(get_ccwdev_lock(device->cdev));
-
-       list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
-               wait_event(dasd_flush_wq,
-                          (cqr->status != DASD_CQR_CLEAR_PENDING));
+       if (!block)
+               return 0;
 
-               /*
-                * requeue requests to blocklayer will only work
-                * for block device requests
-                */
-               if (_dasd_requeue_request(cqr))
-                       continue;
+       INIT_LIST_HEAD(&requeue_queue);
+       rc = _dasd_requests_to_flushqueue(block, &requeue_queue);
 
-               /* remove requests from device and block queue */
-               list_del_init(&cqr->devlist);
-               while (cqr->refers != NULL) {
-                       refers = cqr->refers;
-                       /* remove the request from the block queue */
-                       list_del(&cqr->blocklist);
-                       /* free the finished erp request */
-                       dasd_free_erp_request(cqr, cqr->memdev);
-                       cqr = refers;
+       /* Now call the callback function of flushed requests */
+restart_cb:
+       list_for_each_entry_safe(cqr, n, &requeue_queue, blocklist) {
+               wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
+               /* Process finished ERP request. */
+               if (cqr->refers) {
+                       spin_lock_bh(&block->queue_lock);
+                       __dasd_process_erp(block->base, cqr);
+                       spin_unlock_bh(&block->queue_lock);
+                       /* restart list_for_xx loop since dasd_process_erp
+                        * might remove multiple elements
+                        */
+                       goto restart_cb;
                }
-
-               /*
-                * _dasd_requeue_request already checked for a valid
-                * blockdevice, no need to check again
-                * all erp requests (cqr->refers) have a cqr->block
-                * pointer copy from the original cqr
-                */
+               _dasd_requeue_request(cqr);
                list_del_init(&cqr->blocklist);
                cqr->block->base->discipline->free_cp(
                        cqr, (struct request *) cqr->callback_data);
        }
-
-       /*
-        * if requests remain then they are internal request
-        * and go back to the device queue
-        */
-       if (!list_empty(&requeue_queue)) {
-               /* move freeze_queue to start of the ccw_queue */
-               spin_lock_irq(get_ccwdev_lock(device->cdev));
-               list_splice_tail(&requeue_queue, &device->ccw_queue);
-               spin_unlock_irq(get_ccwdev_lock(device->cdev));
-       }
        dasd_schedule_device_bh(device);
        return rc;
 }