[SCSI] libata, libsas: introduce sched_eh and end_eh port ops
authorDan Williams <dan.j.williams@intel.com>
Fri, 22 Jun 2012 06:25:27 +0000 (23:25 -0700)
committerJames Bottomley <JBottomley@Parallels.com>
Fri, 20 Jul 2012 07:58:45 +0000 (08:58 +0100)
When managing shost->host_eh_scheduled libata assumes that there is a
1:1 shost-to-ata_port relationship.  libsas creates a 1:N relationship
so it needs to manage host_eh_scheduled cumulatively at the host level.
The sched_eh and end_eh port port ops allow libsas to track when domain
devices enter/leave the "eh-pending" state under ha->lock (previously
named ha->state_lock, but it is no longer just a lock for ha->state
changes).

Since host_eh_scheduled indicates eh without backing commands pinning
the device it can be deallocated at any time.  Move the taking of the
domain_device reference under the port_lock to guarantee that the
ata_port stays around for the duration of eh.

Reviewed-by: Jacek Danecki <jacek.danecki@intel.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
drivers/ata/libata-core.c
drivers/ata/libata-eh.c
drivers/scsi/libsas/sas_ata.c
drivers/scsi/libsas/sas_discover.c
drivers/scsi/libsas/sas_event.c
drivers/scsi/libsas/sas_init.c
drivers/scsi/libsas/sas_scsi_host.c
include/linux/libata.h
include/scsi/libsas.h
include/scsi/sas_ata.h

index cece3a4..3fe1202 100644 (file)
@@ -80,6 +80,8 @@ const struct ata_port_operations ata_base_port_ops = {
        .prereset               = ata_std_prereset,
        .postreset              = ata_std_postreset,
        .error_handler          = ata_std_error_handler,
+       .sched_eh               = ata_std_sched_eh,
+       .end_eh                 = ata_std_end_eh,
 };
 
 const struct ata_port_operations sata_port_ops = {
@@ -6642,6 +6644,8 @@ struct ata_port_operations ata_dummy_port_ops = {
        .qc_prep                = ata_noop_qc_prep,
        .qc_issue               = ata_dummy_qc_issue,
        .error_handler          = ata_dummy_error_handler,
+       .sched_eh               = ata_std_sched_eh,
+       .end_eh                 = ata_std_end_eh,
 };
 
 const struct ata_port_info ata_dummy_port_info = {
index 6d53cf9..77fc806 100644 (file)
@@ -793,12 +793,12 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
                ata_for_each_link(link, ap, HOST_FIRST)
                        memset(&link->eh_info, 0, sizeof(link->eh_info));
 
-               /* Clear host_eh_scheduled while holding ap->lock such
-                * that if exception occurs after this point but
-                * before EH completion, SCSI midlayer will
+               /* end eh (clear host_eh_scheduled) while holding
+                * ap->lock such that if exception occurs after this
+                * point but before EH completion, SCSI midlayer will
                 * re-initiate EH.
                 */
-               host->host_eh_scheduled = 0;
+               ap->ops->end_eh(ap);
 
                spin_unlock_irqrestore(ap->lock, flags);
                ata_eh_release(ap);
@@ -986,16 +986,13 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
 }
 
 /**
- *     ata_port_schedule_eh - schedule error handling without a qc
- *     @ap: ATA port to schedule EH for
- *
- *     Schedule error handling for @ap.  EH will kick in as soon as
- *     all commands are drained.
+ * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine
+ * @ap: ATA port to schedule EH for
  *
- *     LOCKING:
+ *     LOCKING: inherited from ata_port_schedule_eh
  *     spin_lock_irqsave(host lock)
  */
-void ata_port_schedule_eh(struct ata_port *ap)
+void ata_std_sched_eh(struct ata_port *ap)
 {
        WARN_ON(!ap->ops->error_handler);
 
@@ -1007,6 +1004,44 @@ void ata_port_schedule_eh(struct ata_port *ap)
 
        DPRINTK("port EH scheduled\n");
 }
+EXPORT_SYMBOL_GPL(ata_std_sched_eh);
+
+/**
+ * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine
+ * @ap: ATA port to end EH for
+ *
+ * In the libata object model there is a 1:1 mapping of ata_port to
+ * shost, so host fields can be directly manipulated under ap->lock, in
+ * the libsas case we need to hold a lock at the ha->level to coordinate
+ * these events.
+ *
+ *     LOCKING:
+ *     spin_lock_irqsave(host lock)
+ */
+void ata_std_end_eh(struct ata_port *ap)
+{
+       struct Scsi_Host *host = ap->scsi_host;
+
+       host->host_eh_scheduled = 0;
+}
+EXPORT_SYMBOL(ata_std_end_eh);
+
+
+/**
+ *     ata_port_schedule_eh - schedule error handling without a qc
+ *     @ap: ATA port to schedule EH for
+ *
+ *     Schedule error handling for @ap.  EH will kick in as soon as
+ *     all commands are drained.
+ *
+ *     LOCKING:
+ *     spin_lock_irqsave(host lock)
+ */
+void ata_port_schedule_eh(struct ata_port *ap)
+{
+       /* see: ata_std_sched_eh, unless you know better */
+       ap->ops->sched_eh(ap);
+}
 
 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
 {
index d109cc3..b035acf 100644 (file)
@@ -523,6 +523,31 @@ static void sas_ata_set_dmamode(struct ata_port *ap, struct ata_device *ata_dev)
                i->dft->lldd_ata_set_dmamode(dev);
 }
 
+static void sas_ata_sched_eh(struct ata_port *ap)
+{
+       struct domain_device *dev = ap->private_data;
+       struct sas_ha_struct *ha = dev->port->ha;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ha->lock, flags);
+       if (!test_and_set_bit(SAS_DEV_EH_PENDING, &dev->state))
+               ha->eh_active++;
+       ata_std_sched_eh(ap);
+       spin_unlock_irqrestore(&ha->lock, flags);
+}
+
+void sas_ata_end_eh(struct ata_port *ap)
+{
+       struct domain_device *dev = ap->private_data;
+       struct sas_ha_struct *ha = dev->port->ha;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ha->lock, flags);
+       if (test_and_clear_bit(SAS_DEV_EH_PENDING, &dev->state))
+               ha->eh_active--;
+       spin_unlock_irqrestore(&ha->lock, flags);
+}
+
 static struct ata_port_operations sas_sata_ops = {
        .prereset               = ata_std_prereset,
        .hardreset              = sas_ata_hard_reset,
@@ -536,6 +561,8 @@ static struct ata_port_operations sas_sata_ops = {
        .port_start             = ata_sas_port_start,
        .port_stop              = ata_sas_port_stop,
        .set_dmamode            = sas_ata_set_dmamode,
+       .sched_eh               = sas_ata_sched_eh,
+       .end_eh                 = sas_ata_end_eh,
 };
 
 static struct ata_port_info sata_port_info = {
@@ -708,10 +735,6 @@ static void async_sas_ata_eh(void *data, async_cookie_t cookie)
        struct ata_port *ap = dev->sata_dev.ap;
        struct sas_ha_struct *ha = dev->port->ha;
 
-       /* hold a reference over eh since we may be racing with final
-        * remove once all commands are completed
-        */
-       kref_get(&dev->kref);
        sas_ata_printk(KERN_DEBUG, dev, "dev error handler\n");
        ata_scsi_port_error_handler(ha->core.shost, ap);
        sas_put_device(dev);
@@ -742,6 +765,13 @@ void sas_ata_strategy_handler(struct Scsi_Host *shost)
                list_for_each_entry(dev, &port->dev_list, dev_list_node) {
                        if (!dev_is_sata(dev))
                                continue;
+
+                       /* hold a reference over eh since we may be
+                        * racing with final remove once all commands
+                        * are completed
+                        */
+                       kref_get(&dev->kref);
+
                        async_schedule_domain(async_sas_ata_eh, dev, &async);
                }
                spin_unlock(&port->dev_list_lock);
index 629a086..ff497ac 100644 (file)
@@ -294,6 +294,8 @@ static void sas_unregister_common_dev(struct asd_sas_port *port, struct domain_d
 
        spin_lock_irq(&port->dev_list_lock);
        list_del_init(&dev->dev_list_node);
+       if (dev_is_sata(dev))
+               sas_ata_end_eh(dev->sata_dev.ap);
        spin_unlock_irq(&port->dev_list_lock);
 
        sas_put_device(dev);
@@ -488,9 +490,9 @@ static void sas_chain_event(int event, unsigned long *pending,
        if (!test_and_set_bit(event, pending)) {
                unsigned long flags;
 
-               spin_lock_irqsave(&ha->state_lock, flags);
+               spin_lock_irqsave(&ha->lock, flags);
                sas_chain_work(ha, sw);
-               spin_unlock_irqrestore(&ha->state_lock, flags);
+               spin_unlock_irqrestore(&ha->lock, flags);
        }
 }
 
index 4e4292d..789c4d8 100644 (file)
@@ -47,9 +47,9 @@ static void sas_queue_event(int event, unsigned long *pending,
        if (!test_and_set_bit(event, pending)) {
                unsigned long flags;
 
-               spin_lock_irqsave(&ha->state_lock, flags);
+               spin_lock_irqsave(&ha->lock, flags);
                sas_queue_work(ha, work);
-               spin_unlock_irqrestore(&ha->state_lock, flags);
+               spin_unlock_irqrestore(&ha->lock, flags);
        }
 }
 
@@ -61,18 +61,18 @@ void __sas_drain_work(struct sas_ha_struct *ha)
 
        set_bit(SAS_HA_DRAINING, &ha->state);
        /* flush submitters */
-       spin_lock_irq(&ha->state_lock);
-       spin_unlock_irq(&ha->state_lock);
+       spin_lock_irq(&ha->lock);
+       spin_unlock_irq(&ha->lock);
 
        drain_workqueue(wq);
 
-       spin_lock_irq(&ha->state_lock);
+       spin_lock_irq(&ha->lock);
        clear_bit(SAS_HA_DRAINING, &ha->state);
        list_for_each_entry_safe(sw, _sw, &ha->defer_q, drain_node) {
                list_del_init(&sw->drain_node);
                sas_queue_work(ha, sw);
        }
-       spin_unlock_irq(&ha->state_lock);
+       spin_unlock_irq(&ha->lock);
 }
 
 int sas_drain_work(struct sas_ha_struct *ha)
index 10cb5ae..6909fef 100644 (file)
@@ -114,7 +114,7 @@ int sas_register_ha(struct sas_ha_struct *sas_ha)
                sas_ha->lldd_queue_size = 128; /* Sanity */
 
        set_bit(SAS_HA_REGISTERED, &sas_ha->state);
-       spin_lock_init(&sas_ha->state_lock);
+       spin_lock_init(&sas_ha->lock);
        mutex_init(&sas_ha->drain_mutex);
        INIT_LIST_HEAD(&sas_ha->defer_q);
 
@@ -163,9 +163,9 @@ int sas_unregister_ha(struct sas_ha_struct *sas_ha)
         * events to be queued, and flush any in-progress drainers
         */
        mutex_lock(&sas_ha->drain_mutex);
-       spin_lock_irq(&sas_ha->state_lock);
+       spin_lock_irq(&sas_ha->lock);
        clear_bit(SAS_HA_REGISTERED, &sas_ha->state);
-       spin_unlock_irq(&sas_ha->state_lock);
+       spin_unlock_irq(&sas_ha->lock);
        __sas_drain_work(sas_ha);
        mutex_unlock(&sas_ha->drain_mutex);
 
@@ -411,9 +411,9 @@ static int queue_phy_reset(struct sas_phy *phy, int hard_reset)
        d->reset_result = 0;
        d->hard_reset = hard_reset;
 
-       spin_lock_irq(&ha->state_lock);
+       spin_lock_irq(&ha->lock);
        sas_queue_work(ha, &d->reset_work);
-       spin_unlock_irq(&ha->state_lock);
+       spin_unlock_irq(&ha->lock);
 
        rc = sas_drain_work(ha);
        if (rc == 0)
@@ -438,9 +438,9 @@ static int queue_phy_enable(struct sas_phy *phy, int enable)
        d->enable_result = 0;
        d->enable = enable;
 
-       spin_lock_irq(&ha->state_lock);
+       spin_lock_irq(&ha->lock);
        sas_queue_work(ha, &d->enable_work);
-       spin_unlock_irq(&ha->state_lock);
+       spin_unlock_irq(&ha->lock);
 
        rc = sas_drain_work(ha);
        if (rc == 0)
index f0b9b7b..a09da44 100644 (file)
@@ -667,16 +667,20 @@ static void sas_eh_handle_sas_errors(struct Scsi_Host *shost, struct list_head *
        goto out;
 }
 
+
 void sas_scsi_recover_host(struct Scsi_Host *shost)
 {
        struct sas_ha_struct *ha = SHOST_TO_SAS_HA(shost);
-       unsigned long flags;
        LIST_HEAD(eh_work_q);
+       int tries = 0;
+       bool retry;
 
-       spin_lock_irqsave(shost->host_lock, flags);
+retry:
+       tries++;
+       retry = true;
+       spin_lock_irq(shost->host_lock);
        list_splice_init(&shost->eh_cmd_q, &eh_work_q);
-       shost->host_eh_scheduled = 0;
-       spin_unlock_irqrestore(shost->host_lock, flags);
+       spin_unlock_irq(shost->host_lock);
 
        SAS_DPRINTK("Enter %s busy: %d failed: %d\n",
                    __func__, shost->host_busy, shost->host_failed);
@@ -710,8 +714,19 @@ out:
 
        scsi_eh_flush_done_q(&ha->eh_done_q);
 
-       SAS_DPRINTK("--- Exit %s: busy: %d failed: %d\n",
-                   __func__, shost->host_busy, shost->host_failed);
+       /* check if any new eh work was scheduled during the last run */
+       spin_lock_irq(&ha->lock);
+       if (ha->eh_active == 0) {
+               shost->host_eh_scheduled = 0;
+               retry = false;
+       }
+       spin_unlock_irq(&ha->lock);
+
+       if (retry)
+               goto retry;
+
+       SAS_DPRINTK("--- Exit %s: busy: %d failed: %d tries: %d\n",
+                   __func__, shost->host_busy, shost->host_failed, tries);
 }
 
 enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
index 6e887c7..53da442 100644 (file)
@@ -846,6 +846,8 @@ struct ata_port_operations {
        void (*error_handler)(struct ata_port *ap);
        void (*lost_interrupt)(struct ata_port *ap);
        void (*post_internal_cmd)(struct ata_queued_cmd *qc);
+       void (*sched_eh)(struct ata_port *ap);
+       void (*end_eh)(struct ata_port *ap);
 
        /*
         * Optional features
@@ -1167,6 +1169,8 @@ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
                      ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
                      ata_postreset_fn_t postreset);
 extern void ata_std_error_handler(struct ata_port *ap);
+extern void ata_std_sched_eh(struct ata_port *ap);
+extern void ata_std_end_eh(struct ata_port *ap);
 extern int ata_link_nr_enabled(struct ata_link *link);
 
 /*
index 10ce74f..814d8cb 100644 (file)
@@ -179,6 +179,7 @@ struct sata_device {
 enum {
        SAS_DEV_GONE,
        SAS_DEV_DESTROY,
+       SAS_DEV_EH_PENDING,
 };
 
 struct domain_device {
@@ -386,7 +387,8 @@ struct sas_ha_struct {
        struct list_head  defer_q; /* work queued while draining */
        struct mutex      drain_mutex;
        unsigned long     state;
-       spinlock_t        state_lock;
+       spinlock_t        lock;
+       int               eh_active;
 
        struct mutex disco_mutex;
 
index 77670e8..2dfbdaa 100644 (file)
@@ -45,6 +45,7 @@ void sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
 void sas_ata_schedule_reset(struct domain_device *dev);
 void sas_ata_wait_eh(struct domain_device *dev);
 void sas_probe_sata(struct asd_sas_port *port);
+void sas_ata_end_eh(struct ata_port *ap);
 #else
 
 
@@ -85,6 +86,10 @@ static inline int sas_get_ata_info(struct domain_device *dev, struct ex_phy *phy
 {
        return 0;
 }
+
+static inline void sas_ata_end_eh(struct ata_port *ap)
+{
+}
 #endif
 
 #endif /* _SAS_ATA_H_ */