From 59a9ed5f87b6e396aed1f6bf7d6496f7ba66c37a Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 23 Feb 2016 10:15:27 +0100 Subject: [PATCH] s390/dasd: reorder lcu and device lock Reorder lcu and device lock to get rid of the error-prone trylock mechanism. The locking order is lcu lock -> device lock. This protects against changes to the lcu device lists and enables us to iterate over the devices, take the cdev lock and make changes to the device structures. The complicated part is the summary unit check handler that gets an interrupt on one device of the lcu that leads to structural changes of the whole lcu itself. This work needs to be done even if devices on the lcu disappear. So a device independent worker is used. The old approach tried to update some lcu structures and set up the lcu worker in the interrupt context with the device lock held. But this forced the lock order "cdev lock -> lcu lock" that made it hard to have the lcu lock held and iterate over all devices and change them. The new approach is to schedule a device specific worker that gets out of the interrupt context and rid of the device lock for summary unit checks. This worker is able to take the lcu lock and schedule the lcu worker that updates all devices. The time between interrupt and worker execution is no problem because the devices in the lcu reject all I/O in this time with an appropriate error. The dasd driver can deal with this situation and re-drive the I/O later on. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_alias.c | 226 ++++++++++------------------------------ drivers/s390/block/dasd_eckd.c | 38 +++++-- drivers/s390/block/dasd_eckd.h | 3 +- drivers/s390/block/dasd_int.h | 2 + 4 files changed, 86 insertions(+), 183 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 17ad574..1e56018 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -317,17 +317,17 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, struct alias_pav_group *group; struct dasd_uid uid; + spin_lock(get_ccwdev_lock(device->cdev)); private->uid.type = lcu->uac->unit[private->uid.real_unit_addr].ua_type; private->uid.base_unit_addr = lcu->uac->unit[private->uid.real_unit_addr].base_ua; uid = private->uid; - + spin_unlock(get_ccwdev_lock(device->cdev)); /* if we have no PAV anyway, we don't need to bother with PAV groups */ if (lcu->pav == NO_PAV) { list_move(&device->alias_list, &lcu->active_devices); return 0; } - group = _find_group(lcu, &uid); if (!group) { group = kzalloc(sizeof(*group), GFP_ATOMIC); @@ -397,130 +397,6 @@ suborder_not_supported(struct dasd_ccw_req *cqr) return 0; } -/* - * This function tries to lock all devices on an lcu via trylock - * return NULL on success otherwise return first failed device - */ -static struct dasd_device *_trylock_all_devices_on_lcu(struct alias_lcu *lcu, - struct dasd_device *pos) - -{ - struct alias_pav_group *pavgroup; - struct dasd_device *device; - - list_for_each_entry(device, &lcu->active_devices, alias_list) { - if (device == pos) - continue; - if (!spin_trylock(get_ccwdev_lock(device->cdev))) - return device; - } - list_for_each_entry(device, &lcu->inactive_devices, alias_list) { - if (device == pos) - continue; - if (!spin_trylock(get_ccwdev_lock(device->cdev))) - return device; - } - list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(device, &pavgroup->baselist, alias_list) { - if (device == pos) - continue; - if (!spin_trylock(get_ccwdev_lock(device->cdev))) - return device; - } - list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { - if (device == pos) - continue; - if (!spin_trylock(get_ccwdev_lock(device->cdev))) - return device; - } - } - return NULL; -} - -/* - * unlock all devices except the one that is specified as pos - * stop if enddev is specified and reached - */ -static void _unlock_all_devices_on_lcu(struct alias_lcu *lcu, - struct dasd_device *pos, - struct dasd_device *enddev) - -{ - struct alias_pav_group *pavgroup; - struct dasd_device *device; - - list_for_each_entry(device, &lcu->active_devices, alias_list) { - if (device == pos) - continue; - if (device == enddev) - return; - spin_unlock(get_ccwdev_lock(device->cdev)); - } - list_for_each_entry(device, &lcu->inactive_devices, alias_list) { - if (device == pos) - continue; - if (device == enddev) - return; - spin_unlock(get_ccwdev_lock(device->cdev)); - } - list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(device, &pavgroup->baselist, alias_list) { - if (device == pos) - continue; - if (device == enddev) - return; - spin_unlock(get_ccwdev_lock(device->cdev)); - } - list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { - if (device == pos) - continue; - if (device == enddev) - return; - spin_unlock(get_ccwdev_lock(device->cdev)); - } - } -} - -/* - * this function is needed because the locking order - * device lock -> lcu lock - * needs to be assured when iterating over devices in an LCU - * - * if a device is specified in pos then the device lock is already hold - */ -static void _trylock_and_lock_lcu_irqsave(struct alias_lcu *lcu, - struct dasd_device *pos, - unsigned long *flags) -{ - struct dasd_device *failed; - - do { - spin_lock_irqsave(&lcu->lock, *flags); - failed = _trylock_all_devices_on_lcu(lcu, pos); - if (failed) { - _unlock_all_devices_on_lcu(lcu, pos, failed); - spin_unlock_irqrestore(&lcu->lock, *flags); - cpu_relax(); - } - } while (failed); -} - -static void _trylock_and_lock_lcu(struct alias_lcu *lcu, - struct dasd_device *pos) -{ - struct dasd_device *failed; - - do { - spin_lock(&lcu->lock); - failed = _trylock_all_devices_on_lcu(lcu, pos); - if (failed) { - _unlock_all_devices_on_lcu(lcu, pos, failed); - spin_unlock(&lcu->lock); - cpu_relax(); - } - } while (failed); -} - static int read_unit_address_configuration(struct dasd_device *device, struct alias_lcu *lcu) { @@ -615,7 +491,7 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) if (rc) return rc; - _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags); + spin_lock_irqsave(&lcu->lock, flags); lcu->pav = NO_PAV; for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) { switch (lcu->uac->unit[i].ua_type) { @@ -634,7 +510,6 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) alias_list) { _add_device_to_lcu(lcu, device, refdev); } - _unlock_all_devices_on_lcu(lcu, NULL, NULL); spin_unlock_irqrestore(&lcu->lock, flags); return 0; } @@ -722,8 +597,7 @@ int dasd_alias_add_device(struct dasd_device *device) lcu = private->lcu; rc = 0; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - spin_lock(&lcu->lock); + spin_lock_irqsave(&lcu->lock, flags); if (!(lcu->flags & UPDATE_PENDING)) { rc = _add_device_to_lcu(lcu, device, device); if (rc) @@ -733,8 +607,7 @@ int dasd_alias_add_device(struct dasd_device *device) list_move(&device->alias_list, &lcu->active_devices); _schedule_lcu_update(lcu, device); } - spin_unlock(&lcu->lock); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); + spin_unlock_irqrestore(&lcu->lock, flags); return rc; } @@ -933,15 +806,27 @@ static void _stop_all_devices_on_lcu(struct alias_lcu *lcu) struct alias_pav_group *pavgroup; struct dasd_device *device; - list_for_each_entry(device, &lcu->active_devices, alias_list) + list_for_each_entry(device, &lcu->active_devices, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_set_stop_bits(device, DASD_STOPPED_SU); - list_for_each_entry(device, &lcu->inactive_devices, alias_list) + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &lcu->inactive_devices, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_set_stop_bits(device, DASD_STOPPED_SU); + spin_unlock(get_ccwdev_lock(device->cdev)); + } list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(device, &pavgroup->baselist, alias_list) + list_for_each_entry(device, &pavgroup->baselist, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_set_stop_bits(device, DASD_STOPPED_SU); - list_for_each_entry(device, &pavgroup->aliaslist, alias_list) + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_set_stop_bits(device, DASD_STOPPED_SU); + spin_unlock(get_ccwdev_lock(device->cdev)); + } } } @@ -950,15 +835,27 @@ static void _unstop_all_devices_on_lcu(struct alias_lcu *lcu) struct alias_pav_group *pavgroup; struct dasd_device *device; - list_for_each_entry(device, &lcu->active_devices, alias_list) + list_for_each_entry(device, &lcu->active_devices, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - list_for_each_entry(device, &lcu->inactive_devices, alias_list) + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &lcu->inactive_devices, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); + spin_unlock(get_ccwdev_lock(device->cdev)); + } list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(device, &pavgroup->baselist, alias_list) + list_for_each_entry(device, &pavgroup->baselist, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - list_for_each_entry(device, &pavgroup->aliaslist, alias_list) + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { + spin_lock(get_ccwdev_lock(device->cdev)); dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); + spin_unlock(get_ccwdev_lock(device->cdev)); + } } } @@ -984,48 +881,32 @@ static void summary_unit_check_handling_work(struct work_struct *work) spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); reset_summary_unit_check(lcu, device, suc_data->reason); - _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags); + spin_lock_irqsave(&lcu->lock, flags); _unstop_all_devices_on_lcu(lcu); _restart_all_base_devices_on_lcu(lcu); /* 3. read new alias configuration */ _schedule_lcu_update(lcu, device); lcu->suc_data.device = NULL; dasd_put_device(device); - _unlock_all_devices_on_lcu(lcu, NULL, NULL); spin_unlock_irqrestore(&lcu->lock, flags); } -/* - * note: this will be called from int handler context (cdev locked) - */ -void dasd_alias_handle_summary_unit_check(struct dasd_device *device, - struct irb *irb) +void dasd_alias_handle_summary_unit_check(struct work_struct *work) { + struct dasd_device *device = container_of(work, struct dasd_device, + suc_work); struct dasd_eckd_private *private = device->private; struct alias_lcu *lcu; - char reason; - char *sense; - - sense = dasd_get_sense(irb); - if (sense) { - reason = sense[8]; - DBF_DEV_EVENT(DBF_NOTICE, device, "%s %x", - "eckd handle summary unit check: reason", reason); - } else { - DBF_DEV_EVENT(DBF_WARNING, device, "%s", - "eckd handle summary unit check:" - " no reason code available"); - return; - } + unsigned long flags; lcu = private->lcu; if (!lcu) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", "device not ready to handle summary" " unit check (no lcu structure)"); - return; + goto out; } - _trylock_and_lock_lcu(lcu, device); + spin_lock_irqsave(&lcu->lock, flags); /* If this device is about to be removed just return and wait for * the next interrupt on a different device */ @@ -1033,27 +914,26 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, DBF_DEV_EVENT(DBF_WARNING, device, "%s", "device is in offline processing," " don't do summary unit check handling"); - _unlock_all_devices_on_lcu(lcu, device, NULL); - spin_unlock(&lcu->lock); - return; + goto out_unlock; } if (lcu->suc_data.device) { /* already scheduled or running */ DBF_DEV_EVENT(DBF_WARNING, device, "%s", "previous instance of summary unit check worker" " still pending"); - _unlock_all_devices_on_lcu(lcu, device, NULL); - spin_unlock(&lcu->lock); - return ; + goto out_unlock; } _stop_all_devices_on_lcu(lcu); /* prepare for lcu_update */ - private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING; - lcu->suc_data.reason = reason; + lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING; + lcu->suc_data.reason = private->suc_reason; lcu->suc_data.device = device; dasd_get_device(device); - _unlock_all_devices_on_lcu(lcu, device, NULL); - spin_unlock(&lcu->lock); if (!schedule_work(&lcu->suc_data.worker)) dasd_put_device(device); +out_unlock: + spin_unlock_irqrestore(&lcu->lock, flags); +out: + clear_bit(DASD_FLAG_SUC, &device->flags); + dasd_put_device(device); }; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 75c032d..c1b4ae5 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1682,6 +1682,8 @@ dasd_eckd_check_characteristics(struct dasd_device *device) /* setup work queue for validate server*/ INIT_WORK(&device->kick_validate, dasd_eckd_do_validate_server); + /* setup work queue for summary unit check */ + INIT_WORK(&device->suc_work, dasd_alias_handle_summary_unit_check); if (!ccw_device_is_pathgroup(device->cdev)) { dev_warn(&device->cdev->dev, @@ -2549,14 +2551,6 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device, device->state == DASD_STATE_ONLINE && !test_bit(DASD_FLAG_OFFLINE, &device->flags) && !test_bit(DASD_FLAG_SUSPENDED, &device->flags)) { - /* - * the state change could be caused by an alias - * reassignment remove device from alias handling - * to prevent new requests from being scheduled on - * the wrong alias device - */ - dasd_alias_remove_device(device); - /* schedule worker to reload device */ dasd_reload_device(device); } @@ -2571,7 +2565,27 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device, /* summary unit check */ if ((sense[27] & DASD_SENSE_BIT_0) && (sense[7] == 0x0D) && (scsw_dstat(&irb->scsw) & DEV_STAT_UNIT_CHECK)) { - dasd_alias_handle_summary_unit_check(device, irb); + if (test_and_set_bit(DASD_FLAG_SUC, &device->flags)) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "eckd suc: device already notified"); + return; + } + sense = dasd_get_sense(irb); + if (!sense) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "eckd suc: no reason code available"); + clear_bit(DASD_FLAG_SUC, &device->flags); + return; + + } + private->suc_reason = sense[8]; + DBF_DEV_EVENT(DBF_NOTICE, device, "%s %x", + "eckd handle summary unit check: reason", + private->suc_reason); + dasd_get_device(device); + if (!schedule_work(&device->suc_work)) + dasd_put_device(device); + return; } @@ -4495,6 +4509,12 @@ static int dasd_eckd_reload_device(struct dasd_device *device) struct dasd_uid uid; unsigned long flags; + /* + * remove device from alias handling to prevent new requests + * from being scheduled on the wrong alias device + */ + dasd_alias_remove_device(device); + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); old_base = private->uid.base_unit_addr; spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index f8f91ee..6d9a6d3 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -525,6 +525,7 @@ struct dasd_eckd_private { int count; u32 fcx_max_data; + char suc_reason; }; @@ -534,7 +535,7 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *); int dasd_alias_add_device(struct dasd_device *); int dasd_alias_remove_device(struct dasd_device *); struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *); -void dasd_alias_handle_summary_unit_check(struct dasd_device *, struct irb *); +void dasd_alias_handle_summary_unit_check(struct work_struct *); void dasd_eckd_reset_ccw_to_base_io(struct dasd_ccw_req *); void dasd_alias_lcu_setup_complete(struct dasd_device *); void dasd_alias_wait_for_lcu_setup(struct dasd_device *); diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 8de29be..0f0add9 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -470,6 +470,7 @@ struct dasd_device { struct work_struct restore_device; struct work_struct reload_device; struct work_struct kick_validate; + struct work_struct suc_work; struct timer_list timer; debug_info_t *debug_area; @@ -542,6 +543,7 @@ struct dasd_attention_data { #define DASD_FLAG_SAFE_OFFLINE_RUNNING 11 /* safe offline running */ #define DASD_FLAG_ABORTALL 12 /* Abort all noretry requests */ #define DASD_FLAG_PATH_VERIFY 13 /* Path verification worker running */ +#define DASD_FLAG_SUC 14 /* unhandled summary unit check */ #define DASD_SLEEPON_START_TAG ((void *) 1) #define DASD_SLEEPON_END_TAG ((void *) 2) -- 2.7.4