scsi: ufs: core: Fix a race condition related to device management
authorBart Van Assche <bvanassche@acm.org>
Wed, 20 Jul 2022 17:02:23 +0000 (10:02 -0700)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 27 Jul 2022 01:59:29 +0000 (21:59 -0400)
If a device management command completion happens after
wait_for_completion_timeout() times out and before ufshcd_clear_cmds() is
called, then the completion code may crash on the complete() call in
__ufshcd_transfer_req_compl().

Fix the following crash:

  Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
  Call trace:
   complete+0x64/0x178
   __ufshcd_transfer_req_compl+0x30c/0x9c0
   ufshcd_poll+0xf0/0x208
   ufshcd_sl_intr+0xb8/0xf0
   ufshcd_intr+0x168/0x2f4
   __handle_irq_event_percpu+0xa0/0x30c
   handle_irq_event+0x84/0x178
   handle_fasteoi_irq+0x150/0x2e8
   __handle_domain_irq+0x114/0x1e4
   gic_handle_irq.31846+0x58/0x300
   el1_irq+0xe4/0x1c0
   efi_header_end+0x110/0x680
   __irq_exit_rcu+0x108/0x124
   __handle_domain_irq+0x118/0x1e4
   gic_handle_irq.31846+0x58/0x300
   el1_irq+0xe4/0x1c0
   cpuidle_enter_state+0x3ac/0x8c4
   do_idle+0x2fc/0x55c
   cpu_startup_entry+0x84/0x90
   kernel_init+0x0/0x310
   start_kernel+0x0/0x608
   start_kernel+0x4ec/0x608

Link: https://lore.kernel.org/r/20220720170228.1598842-1-bvanassche@acm.org
Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT UPIU")
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Bean Huo <beanhuo@micron.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/ufs/core/ufshcd.c

index c7b337480e3e7d0317fd4a2735c8af6ed593584b..3d367be7172866afc7cc7e6124c9a525c6e67ac6 100644 (file)
@@ -2953,37 +2953,59 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
                struct ufshcd_lrb *lrbp, int max_timeout)
 {
-       int err = 0;
-       unsigned long time_left;
+       unsigned long time_left = msecs_to_jiffies(max_timeout);
        unsigned long flags;
+       bool pending;
+       int err;
 
+retry:
        time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
-                       msecs_to_jiffies(max_timeout));
+                                               time_left);
 
-       spin_lock_irqsave(hba->host->host_lock, flags);
-       hba->dev_cmd.complete = NULL;
        if (likely(time_left)) {
+               /*
+                * The completion handler called complete() and the caller of
+                * this function still owns the @lrbp tag so the code below does
+                * not trigger any race conditions.
+                */
+               hba->dev_cmd.complete = NULL;
                err = ufshcd_get_tr_ocs(lrbp);
                if (!err)
                        err = ufshcd_dev_cmd_completion(hba, lrbp);
-       }
-       spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-       if (!time_left) {
+       } else {
                err = -ETIMEDOUT;
                dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
                        __func__, lrbp->task_tag);
-               if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
+               if (ufshcd_clear_cmds(hba, 1U << lrbp->task_tag) == 0) {
                        /* successfully cleared the command, retry if needed */
                        err = -EAGAIN;
-               /*
-                * in case of an error, after clearing the doorbell,
-                * we also need to clear the outstanding_request
-                * field in hba
-                */
-               spin_lock_irqsave(&hba->outstanding_lock, flags);
-               __clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
-               spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+                       /*
+                        * Since clearing the command succeeded we also need to
+                        * clear the task tag bit from the outstanding_reqs
+                        * variable.
+                        */
+                       spin_lock_irqsave(&hba->outstanding_lock, flags);
+                       pending = test_bit(lrbp->task_tag,
+                                          &hba->outstanding_reqs);
+                       if (pending) {
+                               hba->dev_cmd.complete = NULL;
+                               __clear_bit(lrbp->task_tag,
+                                           &hba->outstanding_reqs);
+                       }
+                       spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+                       if (!pending) {
+                               /*
+                                * The completion handler ran while we tried to
+                                * clear the command.
+                                */
+                               time_left = 1;
+                               goto retry;
+                       }
+               } else {
+                       dev_err(hba->dev, "%s: failed to clear tag %d\n",
+                               __func__, lrbp->task_tag);
+               }
        }
 
        return err;