xen: support suspend/resume in pvscsi frontend
authorJuergen Gross <jgross@suse.com>
Tue, 17 Feb 2015 07:02:49 +0000 (08:02 +0100)
committerDavid Vrabel <david.vrabel@citrix.com>
Mon, 16 Mar 2015 14:49:15 +0000 (14:49 +0000)
Up to now the pvscsi frontend hasn't supported domain suspend and
resume. When a domain with an assigned pvscsi device was suspended
and resumed again, it was not able to use the device any more: trying
to do so resulted in hanging processes.

Support suspend and resume of pvscsi devices.

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
drivers/scsi/xen-scsifront.c

index 34199d2..78d9506 100644 (file)
@@ -63,6 +63,7 @@
 
 #define VSCSIFRONT_OP_ADD_LUN  1
 #define VSCSIFRONT_OP_DEL_LUN  2
+#define VSCSIFRONT_OP_READD_LUN        3
 
 /* Tuning point. */
 #define VSCSIIF_DEFAULT_CMD_PER_LUN 10
@@ -113,8 +114,13 @@ struct vscsifrnt_info {
        DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
        struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
 
+       /* Following items are protected by the host lock. */
        wait_queue_head_t wq_sync;
+       wait_queue_head_t wq_pause;
        unsigned int wait_ring_available:1;
+       unsigned int waiting_pause:1;
+       unsigned int pause:1;
+       unsigned callers;
 
        char dev_state_path[64];
        struct task_struct *curr;
@@ -274,31 +280,31 @@ static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
        wake_up(&shadow->wq_reset);
 }
 
-static int scsifront_cmd_done(struct vscsifrnt_info *info)
+static void scsifront_do_response(struct vscsifrnt_info *info,
+                                 struct vscsiif_response *ring_rsp)
+{
+       if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
+                test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
+                "illegal rqid %u returned by backend!\n", ring_rsp->rqid))
+               return;
+
+       if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
+               scsifront_cdb_cmd_done(info, ring_rsp);
+       else
+               scsifront_sync_cmd_done(info, ring_rsp);
+}
+
+static int scsifront_ring_drain(struct vscsifrnt_info *info)
 {
        struct vscsiif_response *ring_rsp;
        RING_IDX i, rp;
        int more_to_do = 0;
-       unsigned long flags;
-
-       spin_lock_irqsave(info->host->host_lock, flags);
 
        rp = info->ring.sring->rsp_prod;
        rmb();  /* ordering required respective to dom0 */
        for (i = info->ring.rsp_cons; i != rp; i++) {
-
                ring_rsp = RING_GET_RESPONSE(&info->ring, i);
-
-               if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
-                        test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
-                        "illegal rqid %u returned by backend!\n",
-                        ring_rsp->rqid))
-                       continue;
-
-               if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
-                       scsifront_cdb_cmd_done(info, ring_rsp);
-               else
-                       scsifront_sync_cmd_done(info, ring_rsp);
+               scsifront_do_response(info, ring_rsp);
        }
 
        info->ring.rsp_cons = i;
@@ -308,6 +314,18 @@ static int scsifront_cmd_done(struct vscsifrnt_info *info)
        else
                info->ring.sring->rsp_event = i + 1;
 
+       return more_to_do;
+}
+
+static int scsifront_cmd_done(struct vscsifrnt_info *info)
+{
+       int more_to_do;
+       unsigned long flags;
+
+       spin_lock_irqsave(info->host->host_lock, flags);
+
+       more_to_do = scsifront_ring_drain(info);
+
        info->wait_ring_available = 0;
 
        spin_unlock_irqrestore(info->host->host_lock, flags);
@@ -328,6 +346,24 @@ static irqreturn_t scsifront_irq_fn(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static void scsifront_finish_all(struct vscsifrnt_info *info)
+{
+       unsigned i;
+       struct vscsiif_response resp;
+
+       scsifront_ring_drain(info);
+
+       for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
+               if (test_bit(i, info->shadow_free_bitmap))
+                       continue;
+               resp.rqid = i;
+               resp.sense_len = 0;
+               resp.rslt = DID_RESET << 16;
+               resp.residual_len = 0;
+               scsifront_do_response(info, &resp);
+       }
+}
+
 static int map_data_for_request(struct vscsifrnt_info *info,
                                struct scsi_cmnd *sc,
                                struct vscsiif_request *ring_req,
@@ -475,6 +511,27 @@ static struct vscsiif_request *scsifront_command2ring(
        return ring_req;
 }
 
+static int scsifront_enter(struct vscsifrnt_info *info)
+{
+       if (info->pause)
+               return 1;
+       info->callers++;
+       return 0;
+}
+
+static void scsifront_return(struct vscsifrnt_info *info)
+{
+       info->callers--;
+       if (info->callers)
+               return;
+
+       if (!info->waiting_pause)
+               return;
+
+       info->waiting_pause = 0;
+       wake_up(&info->wq_pause);
+}
+
 static int scsifront_queuecommand(struct Scsi_Host *shost,
                                  struct scsi_cmnd *sc)
 {
@@ -486,6 +543,10 @@ static int scsifront_queuecommand(struct Scsi_Host *shost,
        uint16_t rqid;
 
        spin_lock_irqsave(shost->host_lock, flags);
+       if (scsifront_enter(info)) {
+               spin_unlock_irqrestore(shost->host_lock, flags);
+               return SCSI_MLQUEUE_HOST_BUSY;
+       }
        if (RING_FULL(&info->ring))
                goto busy;
 
@@ -505,6 +566,7 @@ static int scsifront_queuecommand(struct Scsi_Host *shost,
        if (err < 0) {
                pr_debug("%s: err %d\n", __func__, err);
                scsifront_put_rqid(info, rqid);
+               scsifront_return(info);
                spin_unlock_irqrestore(shost->host_lock, flags);
                if (err == -ENOMEM)
                        return SCSI_MLQUEUE_HOST_BUSY;
@@ -514,11 +576,13 @@ static int scsifront_queuecommand(struct Scsi_Host *shost,
        }
 
        scsifront_do_request(info);
+       scsifront_return(info);
        spin_unlock_irqrestore(shost->host_lock, flags);
 
        return 0;
 
 busy:
+       scsifront_return(info);
        spin_unlock_irqrestore(shost->host_lock, flags);
        pr_debug("%s: busy\n", __func__);
        return SCSI_MLQUEUE_HOST_BUSY;
@@ -549,7 +613,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
                        if (ring_req)
                                break;
                }
-               if (err) {
+               if (err || info->pause) {
                        spin_unlock_irq(host->host_lock);
                        kfree(shadow);
                        return FAILED;
@@ -561,6 +625,11 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
                spin_lock_irq(host->host_lock);
        }
 
+       if (scsifront_enter(info)) {
+               spin_unlock_irq(host->host_lock);
+               return FAILED;
+       }
+
        ring_req->act = act;
        ring_req->ref_rqid = s->rqid;
 
@@ -587,6 +656,7 @@ static int scsifront_action_handler(struct scsi_cmnd *sc, uint8_t act)
                err = FAILED;
        }
 
+       scsifront_return(info);
        spin_unlock_irq(host->host_lock);
        return err;
 }
@@ -698,6 +768,13 @@ free_gnttab:
        return err;
 }
 
+static void scsifront_free_ring(struct vscsifrnt_info *info)
+{
+       unbind_from_irqhandler(info->irq, info);
+       gnttab_end_foreign_access(info->ring_ref, 0,
+                                 (unsigned long)info->ring.sring);
+}
+
 static int scsifront_init_ring(struct vscsifrnt_info *info)
 {
        struct xenbus_device *dev = info->dev;
@@ -744,9 +821,7 @@ again:
 fail:
        xenbus_transaction_end(xbt, 1);
 free_sring:
-       unbind_from_irqhandler(info->irq, info);
-       gnttab_end_foreign_access(info->ring_ref, 0,
-                                 (unsigned long)info->ring.sring);
+       scsifront_free_ring(info);
 
        return err;
 }
@@ -779,6 +854,7 @@ static int scsifront_probe(struct xenbus_device *dev,
        }
 
        init_waitqueue_head(&info->wq_sync);
+       init_waitqueue_head(&info->wq_pause);
        spin_lock_init(&info->shadow_lock);
 
        snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
@@ -802,13 +878,60 @@ static int scsifront_probe(struct xenbus_device *dev,
        return 0;
 
 free_sring:
-       unbind_from_irqhandler(info->irq, info);
-       gnttab_end_foreign_access(info->ring_ref, 0,
-                                 (unsigned long)info->ring.sring);
+       scsifront_free_ring(info);
        scsi_host_put(host);
        return err;
 }
 
+static int scsifront_resume(struct xenbus_device *dev)
+{
+       struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+       struct Scsi_Host *host = info->host;
+       int err;
+
+       spin_lock_irq(host->host_lock);
+
+       /* Finish all still pending commands. */
+       scsifront_finish_all(info);
+
+       spin_unlock_irq(host->host_lock);
+
+       /* Reconnect to dom0. */
+       scsifront_free_ring(info);
+       err = scsifront_init_ring(info);
+       if (err) {
+               dev_err(&dev->dev, "fail to resume %d\n", err);
+               scsi_host_put(host);
+               return err;
+       }
+
+       xenbus_switch_state(dev, XenbusStateInitialised);
+
+       return 0;
+}
+
+static int scsifront_suspend(struct xenbus_device *dev)
+{
+       struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+       struct Scsi_Host *host = info->host;
+       int err = 0;
+
+       /* No new commands for the backend. */
+       spin_lock_irq(host->host_lock);
+       info->pause = 1;
+       while (info->callers && !err) {
+               info->waiting_pause = 1;
+               info->wait_ring_available = 0;
+               spin_unlock_irq(host->host_lock);
+               wake_up(&info->wq_sync);
+               err = wait_event_interruptible(info->wq_pause,
+                                              !info->waiting_pause);
+               spin_lock_irq(host->host_lock);
+       }
+       spin_unlock_irq(host->host_lock);
+       return err;
+}
+
 static int scsifront_remove(struct xenbus_device *dev)
 {
        struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
@@ -823,10 +946,7 @@ static int scsifront_remove(struct xenbus_device *dev)
        }
        mutex_unlock(&scsifront_mutex);
 
-       gnttab_end_foreign_access(info->ring_ref, 0,
-                                 (unsigned long)info->ring.sring);
-       unbind_from_irqhandler(info->irq, info);
-
+       scsifront_free_ring(info);
        scsi_host_put(info->host);
 
        return 0;
@@ -919,6 +1039,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
                                scsi_device_put(sdev);
                        }
                        break;
+               case VSCSIFRONT_OP_READD_LUN:
+                       if (device_state == XenbusStateConnected)
+                               xenbus_printf(XBT_NIL, dev->nodename,
+                                             info->dev_state_path,
+                                             "%d", XenbusStateConnected);
+                       break;
                default:
                        break;
                }
@@ -932,21 +1058,29 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 static void scsifront_read_backend_params(struct xenbus_device *dev,
                                          struct vscsifrnt_info *info)
 {
-       unsigned int sg_grant;
+       unsigned int sg_grant, nr_segs;
        int ret;
        struct Scsi_Host *host = info->host;
 
        ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u",
                           &sg_grant);
-       if (ret == 1 && sg_grant) {
-               sg_grant = min_t(unsigned int, sg_grant, SG_ALL);
-               sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE);
-               host->sg_tablesize = min_t(unsigned int, sg_grant,
+       if (ret != 1)
+               sg_grant = 0;
+       nr_segs = min_t(unsigned int, sg_grant, SG_ALL);
+       nr_segs = max_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE);
+       nr_segs = min_t(unsigned int, nr_segs,
                        VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
                        sizeof(struct scsiif_request_segment));
-               host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
-       }
-       dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize);
+
+       if (!info->pause && sg_grant)
+               dev_info(&dev->dev, "using up to %d SG entries\n", nr_segs);
+       else if (info->pause && nr_segs < host->sg_tablesize)
+               dev_warn(&dev->dev,
+                        "SG entries decreased from %d to %u - device may not work properly anymore\n",
+                        host->sg_tablesize, nr_segs);
+
+       host->sg_tablesize = nr_segs;
+       host->max_sectors = (nr_segs - 1) * PAGE_SIZE / 512;
 }
 
 static void scsifront_backend_changed(struct xenbus_device *dev,
@@ -965,6 +1099,14 @@ static void scsifront_backend_changed(struct xenbus_device *dev,
 
        case XenbusStateConnected:
                scsifront_read_backend_params(dev, info);
+
+               if (info->pause) {
+                       scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN);
+                       xenbus_switch_state(dev, XenbusStateConnected);
+                       info->pause = 0;
+                       return;
+               }
+
                if (xenbus_read_driver_state(dev->nodename) ==
                    XenbusStateInitialised)
                        scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
@@ -1002,6 +1144,8 @@ static struct xenbus_driver scsifront_driver = {
        .ids                    = scsifront_ids,
        .probe                  = scsifront_probe,
        .remove                 = scsifront_remove,
+       .resume                 = scsifront_resume,
+       .suspend                = scsifront_suspend,
        .otherend_changed       = scsifront_backend_changed,
 };