scsi: ufs: Fix a race condition between ufshcd_abort() and eh_work()
authorCan Guo <cang@codeaurora.org>
Wed, 2 Dec 2020 12:04:02 +0000 (04:04 -0800)
committerMartin K. Petersen <martin.petersen@oracle.com>
Mon, 7 Dec 2020 23:29:24 +0000 (18:29 -0500)
In current task abort routine, if task abort happens to the device W-LUN,
the code directly jumps to ufshcd_eh_host_reset_handler() to perform a full
reset and restore then returns FAIL or SUCCESS. Commands sent to the device
W-LUN are most likely the SSU cmds sent during UFS PM operations. If such
SSU cmd enters task abort routine when ufshcd_eh_host_reset_handler()
flushes eh_work, it will get stuck there since err_handler is serialized
with PM operations.

In order to unblock above call path, we merely clean up the lrb taken by
this cmd, queue the eh_work and return SUCCESS. Once the cmd is aborted,
the PM operation which sends out the cmd just errors out, then err_handler
shall be able to proceed with the full reset and restore.

In this scenario, the cmd is aborted even before it is actually cleared by
HW, set the lrb->in_use flag to prevent subsequent cmds, including SCSI
cmds and dev cmds, from taking the lrb released from abort. The flag shall
evetually be cleared in __ufshcd_transfer_req_compl() invoked by the full
reset and restore from err_handler.

[mkp: conflict with event logging series]

Link: https://lore.kernel.org/r/1606910644-21185-3-git-send-email-cang@codeaurora.org
Reviewed-by: Asutosh Das <asutoshd@codeaurora.org>
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h

index b7a22a7..d94a376 100644 (file)
@@ -2558,6 +2558,14 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
                (hba->clk_gating.state != CLKS_ON));
 
        lrbp = &hba->lrb[tag];
+       if (unlikely(lrbp->in_use)) {
+               if (hba->pm_op_in_progress)
+                       set_host_byte(cmd, DID_BAD_TARGET);
+               else
+                       err = SCSI_MLQUEUE_HOST_BUSY;
+               ufshcd_release(hba);
+               goto out;
+       }
 
        WARN_ON(lrbp->cmd);
        lrbp->cmd = cmd;
@@ -2800,6 +2808,11 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
        init_completion(&wait);
        lrbp = &hba->lrb[tag];
+       if (unlikely(lrbp->in_use)) {
+               err = -EBUSY;
+               goto out;
+       }
+
        WARN_ON(lrbp->cmd);
        err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
        if (unlikely(err))
@@ -2816,6 +2829,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 
        err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout);
 
+out:
        ufshcd_add_query_upiu_trace(hba, tag,
                        err ? "query_complete_err" : "query_complete");
 
@@ -4980,9 +4994,11 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
        struct scsi_cmnd *cmd;
        int result;
        int index;
+       bool update_scaling = false;
 
        for_each_set_bit(index, &completed_reqs, hba->nutrs) {
                lrbp = &hba->lrb[index];
+               lrbp->in_use = false;
                lrbp->compl_time_stamp = ktime_get();
                cmd = lrbp->cmd;
                if (cmd) {
@@ -4995,15 +5011,17 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
                        /* Do not touch lrbp after scsi done */
                        cmd->scsi_done(cmd);
                        __ufshcd_release(hba);
+                       update_scaling = true;
                } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
                        lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
                        if (hba->dev_cmd.complete) {
                                ufshcd_add_command_trace(hba, index,
                                                "dev_complete");
                                complete(hba->dev_cmd.complete);
+                               update_scaling = true;
                        }
                }
-               if (ufshcd_is_clkscaling_supported(hba))
+               if (ufshcd_is_clkscaling_supported(hba) && update_scaling)
                        hba->clk_scaling.active_reqs--;
        }
 
@@ -6426,8 +6444,12 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 
        init_completion(&wait);
        lrbp = &hba->lrb[tag];
-       WARN_ON(lrbp->cmd);
+       if (unlikely(lrbp->in_use)) {
+               err = -EBUSY;
+               goto out;
+       }
 
+       WARN_ON(lrbp->cmd);
        lrbp->cmd = NULL;
        lrbp->sense_bufflen = 0;
        lrbp->sense_buffer = NULL;
@@ -6499,6 +6521,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
                }
        }
 
+out:
        blk_put_request(req);
 out_unlock:
        up_read(&hba->clk_scaling_lock);
@@ -6749,18 +6772,6 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
                BUG();
        }
 
-       /*
-        * Task abort to the device W-LUN is illegal. When this command
-        * will fail, due to spec violation, scsi err handling next step
-        * will be to send LU reset which, again, is a spec violation.
-        * To avoid these unnecessary/illegal step we skip to the last error
-        * handling stage: reset and restore.
-        */
-       if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
-               ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
-               return ufshcd_eh_host_reset_handler(cmd);
-       }
-
        ufshcd_hold(hba, false);
        reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
        /* If command is already aborted/completed, return SUCCESS */
@@ -6781,7 +6792,7 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
         * to reduce repeated printouts. For other aborted requests only print
         * basic details.
         */
-       scsi_print_command(hba->lrb[tag].cmd);
+       scsi_print_command(cmd);
        if (!hba->req_abort_count) {
                ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, tag);
                ufshcd_print_evt_hist(hba);
@@ -6800,6 +6811,29 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
                goto cleanup;
        }
 
+       /*
+        * Task abort to the device W-LUN is illegal. When this command
+        * will fail, due to spec violation, scsi err handling next step
+        * will be to send LU reset which, again, is a spec violation.
+        * To avoid these unnecessary/illegal steps, first we clean up
+        * the lrb taken by this cmd and mark the lrb as in_use, then
+        * queue the eh_work and bail.
+        */
+       if (lrbp->lun == UFS_UPIU_UFS_DEVICE_WLUN) {
+               ufshcd_update_evt_hist(hba, UFS_EVT_ABORT, lrbp->lun);
+               spin_lock_irqsave(host->host_lock, flags);
+               if (lrbp->cmd) {
+                       __ufshcd_transfer_req_compl(hba, (1UL << tag));
+                       __set_bit(tag, &hba->outstanding_reqs);
+                       lrbp->in_use = true;
+                       hba->force_reset = true;
+                       ufshcd_schedule_eh_work(hba);
+               }
+
+               spin_unlock_irqrestore(host->host_lock, flags);
+               goto out;
+       }
+
        /* Skip task abort in case previous aborts failed and report failure */
        if (lrbp->req_abort_skip)
                err = -EIO;
index 1d4d3f8..08c8a59 100644 (file)
@@ -193,6 +193,7 @@ struct ufs_pm_lvl_states {
  * @crypto_key_slot: the key slot to use for inline crypto (-1 if none)
  * @data_unit_num: the data unit number for the first block for inline crypto
  * @req_abort_skip: skip request abort task flag
+ * @in_use: indicates that this lrb is still in use
  */
 struct ufshcd_lrb {
        struct utp_transfer_req_desc *utr_descriptor_ptr;
@@ -222,6 +223,7 @@ struct ufshcd_lrb {
 #endif
 
        bool req_abort_skip;
+       bool in_use;
 };
 
 /**