scsi: hisi_sas: Prevent parallel FLR and controller reset
authorQi Liu <liuqi115@huawei.com>
Wed, 15 Dec 2021 14:37:39 +0000 (22:37 +0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Fri, 17 Dec 2021 03:59:58 +0000 (22:59 -0500)
If we issue a controller reset command during executing a FLR a hung task
may be found:

 Call trace:
  __switch_to+0x158/0x1cc
  __schedule+0x2e8/0x85c
  schedule+0x7c/0x110
  schedule_timeout+0x190/0x1cc
  __down+0x7c/0xd4
  down+0x5c/0x7c
  hisi_sas_task_exec+0x510/0x680 [hisi_sas_main]
  hisi_sas_queue_command+0x24/0x30 [hisi_sas_main]
  smp_execute_task_sg+0xf4/0x23c [libsas]
  sas_smp_phy_control+0x110/0x1e0 [libsas]
  transport_sas_phy_reset+0xc8/0x190 [libsas]
  phy_reset_work+0x2c/0x40 [libsas]
  process_one_work+0x1dc/0x48c
  worker_thread+0x15c/0x464
  kthread+0x160/0x170
  ret_from_fork+0x10/0x18

This is a race condition which occurs when the FLR completes first.

Here the host HISI_SAS_RESETTING_BIT flag out gets of sync as
HISI_SAS_RESETTING_BIT is not always cleared with the hisi_hba.sem held, so
now only set/unset HISI_SAS_RESETTING_BIT under hisi_hba.sem .

Link: https://lore.kernel.org/r/1639579061-179473-7-git-send-email-john.garry@huawei.com
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c

index 9779115..0e14f90 100644 (file)
@@ -1574,7 +1574,6 @@ void hisi_sas_controller_reset_prepare(struct hisi_hba *hisi_hba)
 {
        struct Scsi_Host *shost = hisi_hba->shost;
 
-       down(&hisi_hba->sem);
        hisi_hba->phy_state = hisi_hba->hw->get_phys_state(hisi_hba);
 
        scsi_block_requests(shost);
@@ -1599,9 +1598,9 @@ void hisi_sas_controller_reset_done(struct hisi_hba *hisi_hba)
        if (hisi_hba->reject_stp_links_msk)
                hisi_sas_terminate_stp_reject(hisi_hba);
        hisi_sas_reset_init_all_devices(hisi_hba);
-       up(&hisi_hba->sem);
        scsi_unblock_requests(shost);
        clear_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags);
+       up(&hisi_hba->sem);
 
        hisi_sas_rescan_topology(hisi_hba, hisi_hba->phy_state);
 }
@@ -1612,8 +1611,11 @@ static int hisi_sas_controller_prereset(struct hisi_hba *hisi_hba)
        if (!hisi_hba->hw->soft_reset)
                return -1;
 
-       if (test_and_set_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags))
+       down(&hisi_hba->sem);
+       if (test_and_set_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags)) {
+               up(&hisi_hba->sem);
                return -1;
+       }
 
        if (hisi_sas_debugfs_enable && hisi_hba->debugfs_itct[0].itct)
                hisi_hba->hw->debugfs_snapshot_regs(hisi_hba);
index 0ef6c21..11a44d9 100644 (file)
@@ -4848,6 +4848,7 @@ static void hisi_sas_reset_prepare_v3_hw(struct pci_dev *pdev)
        int rc;
 
        dev_info(dev, "FLR prepare\n");
+       down(&hisi_hba->sem);
        set_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags);
        hisi_sas_controller_reset_prepare(hisi_hba);