scsi: scsi_debug: Dynamically allocate sdebug_queued_cmd
authorJohn Garry <john.g.garry@oracle.com>
Mon, 27 Mar 2023 07:43:05 +0000 (07:43 +0000)
committerMartin K. Petersen <martin.petersen@oracle.com>
Mon, 3 Apr 2023 02:09:22 +0000 (22:09 -0400)
Eventually we will drop the sdebug_queue struct as it is not really
required, so start with making the sdebug_queued_cmd dynamically allocated
for the lifetime of the scsi_cmnd in the driver.

As an interim measure, make sdebug_queued_cmd.sd_dp a pointer to struct
sdebug_defer. Also keep a value of the index allocated in
sdebug_queued_cmd.qc_arr in struct sdebug_queued_cmd.

To deal with an races in accessing the scsi cmnd allocated struct
sdebug_queued_cmd, add a spinlock for the scsi command in its priv area.
Races may be between scheduling a command for completion, aborting a
command, and the command actually completing and freeing the struct
sdebug_queued_cmd.

[mkp: typo fix]

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20230327074310.1862889-7-john.g.garry@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/scsi_debug.c

index f53f3e7..1c85cbd 100644 (file)
@@ -250,6 +250,11 @@ static const char *sdebug_version_date = "20210520";
 
 #define SDEB_XA_NOT_IN_USE XA_MARK_1
 
+static struct kmem_cache *queued_cmd_cache;
+
+#define TO_QUEUED_CMD(scmd)  ((void *)(scmd)->host_scribble)
+#define ASSIGN_QUEUED_CMD(scmnd, qc) { (scmnd)->host_scribble = (void *) qc; }
+
 /* Zone types (zbcr05 table 25) */
 enum sdebug_z_type {
        ZBC_ZTYPE_CNV   = 0x1,
@@ -337,12 +342,8 @@ struct sdebug_defer {
        struct execute_work ew;
        ktime_t cmpl_ts;/* time since boot to complete this cmd */
        int sqa_idx;    /* index of sdebug_queue array */
-       int qc_idx;     /* index of sdebug_queued_cmd array within sqa_idx */
        int hc_idx;     /* hostwide tag index */
        int issuing_cpu;
-       bool init_hrt;
-       bool init_wq;
-       bool init_poll;
        bool aborted;   /* true when blk_abort_request() already called */
        enum sdeb_defer_type defer_t;
 };
@@ -351,12 +352,16 @@ struct sdebug_queued_cmd {
        /* corresponding bit set in in_use_bm[] in owning struct sdebug_queue
         * instance indicates this slot is in use.
         */
-       struct sdebug_defer *sd_dp;
-       struct scsi_cmnd *a_cmnd;
+       struct sdebug_defer sd_dp;
+       struct scsi_cmnd *scmd;
+};
+
+struct sdebug_scsi_cmd {
+       spinlock_t   lock;
 };
 
 struct sdebug_queue {
-       struct sdebug_queued_cmd qc_arr[SDEBUG_CANQUEUE];
+       struct sdebug_queued_cmd *qc_arr[SDEBUG_CANQUEUE];
        unsigned long in_use_bm[SDEBUG_CANQUEUE_WORDS];
        spinlock_t qc_lock;
 };
@@ -508,6 +513,8 @@ static int sdebug_add_store(void);
 static void sdebug_erase_store(int idx, struct sdeb_store_info *sip);
 static void sdebug_erase_all_stores(bool apart_from_first);
 
+static void sdebug_free_queued_cmd(struct sdebug_queued_cmd *sqcp);
+
 /*
  * The following are overflow arrays for cdbs that "hit" the same index in
  * the opcode_info_arr array. The most time sensitive (or commonly used) cdb
@@ -4919,46 +4926,48 @@ static u32 get_tag(struct scsi_cmnd *cmnd)
 /* Queued (deferred) command completions converge here. */
 static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
 {
-       bool aborted = sd_dp->aborted;
+       struct sdebug_queued_cmd *sqcp = container_of(sd_dp, struct sdebug_queued_cmd, sd_dp);
        int qc_idx;
        int retiring = 0;
-       unsigned long iflags;
+       unsigned long flags, iflags;
+       struct scsi_cmnd *scp = sqcp->scmd;
+       struct sdebug_scsi_cmd *sdsc;
+       bool aborted;
        struct sdebug_queue *sqp;
-       struct sdebug_queued_cmd *sqcp;
-       struct scsi_cmnd *scp;
 
-       if (unlikely(aborted))
-               sd_dp->aborted = false;
-       qc_idx = sd_dp->qc_idx;
-       sqp = sdebug_q_arr + sd_dp->sqa_idx;
+       qc_idx = sd_dp->sqa_idx;
        if (sdebug_statistics) {
                atomic_inc(&sdebug_completions);
                if (raw_smp_processor_id() != sd_dp->issuing_cpu)
                        atomic_inc(&sdebug_miss_cpus);
        }
+       if (!scp) {
+               pr_err("scmd=NULL\n");
+               goto out;
+       }
        if (unlikely((qc_idx < 0) || (qc_idx >= SDEBUG_CANQUEUE))) {
                pr_err("wild qc_idx=%d\n", qc_idx);
-               return;
+               goto out;
        }
+
+       sdsc = scsi_cmd_priv(scp);
+       sqp = get_queue(scp);
        spin_lock_irqsave(&sqp->qc_lock, iflags);
-       WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE);
-       sqcp = &sqp->qc_arr[qc_idx];
-       scp = sqcp->a_cmnd;
-       if (unlikely(scp == NULL)) {
-               spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-               pr_err("scp is NULL, sqa_idx=%d, qc_idx=%d, hc_idx=%d\n",
-                      sd_dp->sqa_idx, qc_idx, sd_dp->hc_idx);
-               return;
-       }
+       spin_lock_irqsave(&sdsc->lock, flags);
+       aborted = sd_dp->aborted;
+       if (unlikely(aborted))
+               sd_dp->aborted = false;
+       ASSIGN_QUEUED_CMD(scp, NULL);
 
        if (unlikely(atomic_read(&retired_max_queue) > 0))
                retiring = 1;
 
-       sqcp->a_cmnd = NULL;
+       sqp->qc_arr[qc_idx] = NULL;
        if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
+               spin_unlock_irqrestore(&sdsc->lock, flags);
                spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-               pr_err("Unexpected completion\n");
-               return;
+               pr_err("Unexpected completion qc_idx=%d\n", qc_idx);
+               goto out;
        }
 
        if (unlikely(retiring)) {       /* user has reduced max_queue */
@@ -4966,9 +4975,10 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
 
                retval = atomic_read(&retired_max_queue);
                if (qc_idx >= retval) {
+                       spin_unlock_irqrestore(&sdsc->lock, flags);
                        spin_unlock_irqrestore(&sqp->qc_lock, iflags);
                        pr_err("index %d too large\n", retval);
-                       return;
+                       goto out;
                }
                k = find_last_bit(sqp->in_use_bm, retval);
                if ((k < sdebug_max_queue) || (k == retval))
@@ -4976,14 +4986,19 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
                else
                        atomic_set(&retired_max_queue, k + 1);
        }
+
+       spin_unlock_irqrestore(&sdsc->lock, flags);
        spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-       if (unlikely(aborted)) {
-               if (sdebug_verbose)
-                       pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n");
+
+       if (aborted) {
+               pr_info("bypassing scsi_done() due to aborted cmd, kicking-off EH\n");
                blk_abort_request(scsi_cmd_to_rq(scp));
-               return;
+               goto out;
        }
+
        scsi_done(scp); /* callback to mid level */
+out:
+       sdebug_free_queued_cmd(sqcp);
 }
 
 /* When high resolution timer goes off this function is called. */
@@ -5233,115 +5248,126 @@ static void scsi_debug_slave_destroy(struct scsi_device *sdp)
        }
 }
 
-static void stop_qc_helper(struct sdebug_defer *sd_dp,
+/* Returns true if we require the queued memory to be freed by the caller. */
+static bool stop_qc_helper(struct sdebug_defer *sd_dp,
                           enum sdeb_defer_type defer_t)
 {
-       if (!sd_dp)
-               return;
-       if (defer_t == SDEB_DEFER_HRT)
-               hrtimer_cancel(&sd_dp->hrt);
-       else if (defer_t == SDEB_DEFER_WQ)
-               cancel_work_sync(&sd_dp->ew.work);
+       if (defer_t == SDEB_DEFER_HRT) {
+               int res = hrtimer_try_to_cancel(&sd_dp->hrt);
+
+               switch (res) {
+               case 0: /* Not active, it must have already run */
+               case -1: /* -1 It's executing the CB */
+                       return false;
+               case 1: /* Was active, we've now cancelled */
+               default:
+                       return true;
+               }
+       } else if (defer_t == SDEB_DEFER_WQ) {
+               /* Cancel if pending */
+               if (cancel_work_sync(&sd_dp->ew.work))
+                       return true;
+               /* Was not pending, so it must have run */
+               return false;
+       } else if (defer_t == SDEB_DEFER_POLL) {
+               return true;
+       }
+
+       return false;
 }
 
-/* If @cmnd found deletes its timer or work queue and returns true; else
-   returns false */
-static bool stop_queued_cmnd(struct scsi_cmnd *cmnd)
+
+static bool scsi_debug_stop_cmnd(struct scsi_cmnd *cmnd, int *sqa_idx)
 {
-       unsigned long iflags;
-       int j, k, qmax, r_qmax;
        enum sdeb_defer_type l_defer_t;
-       struct sdebug_queue *sqp;
        struct sdebug_queued_cmd *sqcp;
        struct sdebug_defer *sd_dp;
+       struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
 
-       for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
+       lockdep_assert_held(&sdsc->lock);
+
+       sqcp = TO_QUEUED_CMD(cmnd);
+       if (!sqcp)
+               return false;
+       sd_dp = &sqcp->sd_dp;
+       if (sqa_idx)
+               *sqa_idx = sd_dp->sqa_idx;
+       l_defer_t = READ_ONCE(sd_dp->defer_t);
+       ASSIGN_QUEUED_CMD(cmnd, NULL);
+
+       if (stop_qc_helper(sd_dp, l_defer_t))
+               sdebug_free_queued_cmd(sqcp);
+
+       return true;
+}
+
+/*
+ * Called from scsi_debug_abort() only, which is for timed-out cmd.
+ */
+static bool scsi_debug_abort_cmnd(struct scsi_cmnd *cmnd)
+{
+       struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
+       struct sdebug_queue *sqp = get_queue(cmnd);
+       unsigned long flags, iflags;
+       int k = -1;
+       bool res;
+
+       spin_lock_irqsave(&sdsc->lock, flags);
+       res = scsi_debug_stop_cmnd(cmnd, &k);
+       spin_unlock_irqrestore(&sdsc->lock, flags);
+
+       if (k >= 0) {
                spin_lock_irqsave(&sqp->qc_lock, iflags);
-               qmax = sdebug_max_queue;
-               r_qmax = atomic_read(&retired_max_queue);
-               if (r_qmax > qmax)
-                       qmax = r_qmax;
-               for (k = 0; k < qmax; ++k) {
-                       if (test_bit(k, sqp->in_use_bm)) {
-                               sqcp = &sqp->qc_arr[k];
-                               if (cmnd != sqcp->a_cmnd)
-                                       continue;
-                               /* found */
-                               sqcp->a_cmnd = NULL;
-                               sd_dp = sqcp->sd_dp;
-                               if (sd_dp) {
-                                       l_defer_t = READ_ONCE(sd_dp->defer_t);
-                                       WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE);
-                               } else
-                                       l_defer_t = SDEB_DEFER_NONE;
-                               spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-                               stop_qc_helper(sd_dp, l_defer_t);
-                               clear_bit(k, sqp->in_use_bm);
-                               return true;
-                       }
-               }
+               clear_bit(k, sqp->in_use_bm);
+               sqp->qc_arr[k] = NULL;
                spin_unlock_irqrestore(&sqp->qc_lock, iflags);
        }
-       return false;
+
+       return res;
 }
 
 /* Deletes (stops) timers or work queues of all queued commands */
 static void stop_all_queued(void)
 {
-       unsigned long iflags;
+       unsigned long iflags, flags;
        int j, k;
-       enum sdeb_defer_type l_defer_t;
        struct sdebug_queue *sqp;
-       struct sdebug_queued_cmd *sqcp;
-       struct sdebug_defer *sd_dp;
 
        for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
                spin_lock_irqsave(&sqp->qc_lock, iflags);
                for (k = 0; k < SDEBUG_CANQUEUE; ++k) {
                        if (test_bit(k, sqp->in_use_bm)) {
-                               sqcp = &sqp->qc_arr[k];
-                               if (sqcp->a_cmnd == NULL)
+                               struct sdebug_queued_cmd *sqcp = sqp->qc_arr[k];
+                               struct sdebug_scsi_cmd *sdsc;
+                               struct scsi_cmnd *scmd;
+
+                               if (!sqcp)
                                        continue;
-                               sqcp->a_cmnd = NULL;
-                               sd_dp = sqcp->sd_dp;
-                               if (sd_dp) {
-                                       l_defer_t = READ_ONCE(sd_dp->defer_t);
-                                       WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE);
-                               } else
-                                       l_defer_t = SDEB_DEFER_NONE;
-                               spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-                               stop_qc_helper(sd_dp, l_defer_t);
+                               scmd = sqcp->scmd;
+                               if (!scmd)
+                                       continue;
+                               sdsc = scsi_cmd_priv(scmd);
+                               spin_lock_irqsave(&sdsc->lock, flags);
+                               if (TO_QUEUED_CMD(scmd) != sqcp) {
+                                       spin_unlock_irqrestore(&sdsc->lock, flags);
+                                       continue;
+                               }
+                               scsi_debug_stop_cmnd(scmd, NULL);
+                               spin_unlock_irqrestore(&sdsc->lock, flags);
+                               sqp->qc_arr[k] = NULL;
                                clear_bit(k, sqp->in_use_bm);
-                               spin_lock_irqsave(&sqp->qc_lock, iflags);
                        }
                }
                spin_unlock_irqrestore(&sqp->qc_lock, iflags);
        }
 }
 
-/* Free queued command memory on heap */
-static void free_all_queued(void)
-{
-       int j, k;
-       struct sdebug_queue *sqp;
-       struct sdebug_queued_cmd *sqcp;
-
-       for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
-               for (k = 0; k < SDEBUG_CANQUEUE; ++k) {
-                       sqcp = &sqp->qc_arr[k];
-                       kfree(sqcp->sd_dp);
-                       sqcp->sd_dp = NULL;
-               }
-       }
-}
-
 static int scsi_debug_abort(struct scsi_cmnd *SCpnt)
 {
-       bool ok;
+       bool ok = scsi_debug_abort_cmnd(SCpnt);
 
        ++num_aborts;
 
-       ok = stop_queued_cmnd(SCpnt);
        if (SDEBUG_OPT_ALL_NOISE & sdebug_opts)
                sdev_printk(KERN_INFO, SCpnt->device,
                            "%s: command%s found\n", __func__,
@@ -5543,6 +5569,34 @@ static bool inject_on_this_cmd(void)
 
 #define INCLUSIVE_TIMING_MAX_NS 1000000                /* 1 millisecond */
 
+
+void sdebug_free_queued_cmd(struct sdebug_queued_cmd *sqcp)
+{
+       if (sqcp)
+               kmem_cache_free(queued_cmd_cache, sqcp);
+}
+
+static struct sdebug_queued_cmd *sdebug_alloc_queued_cmd(struct scsi_cmnd *scmd)
+{
+       struct sdebug_queued_cmd *sqcp;
+       struct sdebug_defer *sd_dp;
+
+       sqcp = kmem_cache_zalloc(queued_cmd_cache, GFP_ATOMIC);
+       if (!sqcp)
+               return NULL;
+
+       sd_dp = &sqcp->sd_dp;
+
+       hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+       sd_dp->hrt.function = sdebug_q_cmd_hrt_complete;
+       INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);
+
+       sqcp->scmd = scmd;
+       sd_dp->sqa_idx = -1;
+
+       return sqcp;
+}
+
 /* Complete the processing of the thread that queued a SCSI command to this
  * driver. It either completes the command by calling cmnd_done() or
  * schedules a hr timer or work queue then returns 0. Returns
@@ -5554,15 +5608,16 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
                                    struct sdebug_dev_info *),
                         int delta_jiff, int ndelay)
 {
-       bool new_sd_dp;
-       bool polled = scsi_cmd_to_rq(cmnd)->cmd_flags & REQ_POLLED;
-       int k;
-       unsigned long iflags;
+       struct request *rq = scsi_cmd_to_rq(cmnd);
+       bool polled = rq->cmd_flags & REQ_POLLED;
+       struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmnd);
+       unsigned long iflags, flags;
        u64 ns_from_boot = 0;
        struct sdebug_queue *sqp;
        struct sdebug_queued_cmd *sqcp;
        struct scsi_device *sdp;
        struct sdebug_defer *sd_dp;
+       int k;
 
        if (unlikely(devip == NULL)) {
                if (scsi_result == 0)
@@ -5606,22 +5661,17 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
                goto respond_in_thread;
        }
        set_bit(k, sqp->in_use_bm);
-       sqcp = &sqp->qc_arr[k];
-       sqcp->a_cmnd = cmnd;
-       cmnd->host_scribble = (unsigned char *)sqcp;
-       sd_dp = sqcp->sd_dp;
-       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 
-       if (!sd_dp) {
-               sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC);
-               if (!sd_dp) {
-                       clear_bit(k, sqp->in_use_bm);
-                       return SCSI_MLQUEUE_HOST_BUSY;
-               }
-               new_sd_dp = true;
-       } else {
-               new_sd_dp = false;
+       sqcp = sdebug_alloc_queued_cmd(cmnd);
+       if (!sqcp) {
+               clear_bit(k, sqp->in_use_bm);
+               spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+               return SCSI_MLQUEUE_HOST_BUSY;
        }
+       sd_dp = &sqcp->sd_dp;
+       sd_dp->sqa_idx = k;
+       sqp->qc_arr[k] = sqcp;
+       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 
        /* Set the hostwide tag */
        if (sdebug_host_max_queue)
@@ -5673,12 +5723,11 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 
                                if (kt <= d) {  /* elapsed duration >= kt */
                                        spin_lock_irqsave(&sqp->qc_lock, iflags);
-                                       sqcp->a_cmnd = NULL;
+                                       sqp->qc_arr[k] = NULL;
                                        clear_bit(k, sqp->in_use_bm);
                                        spin_unlock_irqrestore(&sqp->qc_lock, iflags);
-                                       if (new_sd_dp)
-                                               kfree(sd_dp);
                                        /* call scsi_done() from this thread */
+                                       sdebug_free_queued_cmd(sqcp);
                                        scsi_done(cmnd);
                                        return 0;
                                }
@@ -5686,33 +5735,28 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
                                kt -= d;
                        }
                }
+               if (sdebug_statistics)
+                       sd_dp->issuing_cpu = raw_smp_processor_id();
                if (polled) {
+                       spin_lock_irqsave(&sdsc->lock, flags);
                        sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt);
-                       spin_lock_irqsave(&sqp->qc_lock, iflags);
-                       if (!sd_dp->init_poll) {
-                               sd_dp->init_poll = true;
-                               sqcp->sd_dp = sd_dp;
-                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
-                               sd_dp->qc_idx = k;
-                       }
+                       ASSIGN_QUEUED_CMD(cmnd, sqcp);
                        WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL);
-                       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+                       spin_unlock_irqrestore(&sdsc->lock, flags);
                } else {
-                       if (!sd_dp->init_hrt) {
-                               sd_dp->init_hrt = true;
-                               sqcp->sd_dp = sd_dp;
-                               hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC,
-                                            HRTIMER_MODE_REL_PINNED);
-                               sd_dp->hrt.function = sdebug_q_cmd_hrt_complete;
-                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
-                               sd_dp->qc_idx = k;
-                       }
-                       WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_HRT);
                        /* schedule the invocation of scsi_done() for a later time */
+                       spin_lock_irqsave(&sdsc->lock, flags);
+                       ASSIGN_QUEUED_CMD(cmnd, sqcp);
+                       WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_HRT);
                        hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED);
+                       /*
+                        * The completion handler will try to grab sqcp->lock,
+                        * so there is no chance that the completion handler
+                        * will call scsi_done() until we release the lock
+                        * here (so ok to keep referencing sdsc).
+                        */
+                       spin_unlock_irqrestore(&sdsc->lock, flags);
                }
-               if (sdebug_statistics)
-                       sd_dp->issuing_cpu = raw_smp_processor_id();
        } else {        /* jdelay < 0, use work queue */
                if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) &&
                             atomic_read(&sdeb_inject_pending))) {
@@ -5722,30 +5766,21 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
                                    blk_mq_unique_tag_to_tag(get_tag(cmnd)));
                }
 
+               if (sdebug_statistics)
+                       sd_dp->issuing_cpu = raw_smp_processor_id();
                if (polled) {
+                       spin_lock_irqsave(&sdsc->lock, flags);
+                       ASSIGN_QUEUED_CMD(cmnd, sqcp);
                        sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot);
-                       spin_lock_irqsave(&sqp->qc_lock, iflags);
-                       if (!sd_dp->init_poll) {
-                               sd_dp->init_poll = true;
-                               sqcp->sd_dp = sd_dp;
-                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
-                               sd_dp->qc_idx = k;
-                       }
                        WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_POLL);
-                       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+                       spin_unlock_irqrestore(&sdsc->lock, flags);
                } else {
-                       if (!sd_dp->init_wq) {
-                               sd_dp->init_wq = true;
-                               sqcp->sd_dp = sd_dp;
-                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
-                               sd_dp->qc_idx = k;
-                               INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);
-                       }
+                       spin_lock_irqsave(&sdsc->lock, flags);
+                       ASSIGN_QUEUED_CMD(cmnd, sqcp);
                        WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_WQ);
                        schedule_work(&sd_dp->ew.work);
+                       spin_unlock_irqrestore(&sdsc->lock, flags);
                }
-               if (sdebug_statistics)
-                       sd_dp->issuing_cpu = raw_smp_processor_id();
        }
 
        return 0;
@@ -7066,6 +7101,10 @@ static int __init scsi_debug_init(void)
        hosts_to_add = sdebug_add_host;
        sdebug_add_host = 0;
 
+       queued_cmd_cache = KMEM_CACHE(sdebug_queued_cmd, SLAB_HWCACHE_ALIGN);
+       if (!queued_cmd_cache)
+               goto driver_unreg;
+
        for (k = 0; k < hosts_to_add; k++) {
                if (want_store && k == 0) {
                        ret = sdebug_add_host_helper(idx);
@@ -7088,6 +7127,8 @@ static int __init scsi_debug_init(void)
 
        return 0;
 
+driver_unreg:
+       driver_unregister(&sdebug_driverfs_driver);
 bus_unreg:
        bus_unregister(&pseudo_lld_bus);
 dev_unreg:
@@ -7103,10 +7144,9 @@ static void __exit scsi_debug_exit(void)
 {
        int k = sdebug_num_hosts;
 
-       stop_all_queued();
        for (; k; k--)
                sdebug_do_remove_host(true);
-       free_all_queued();
+       kmem_cache_destroy(queued_cmd_cache);
        driver_unregister(&sdebug_driverfs_driver);
        bus_unregister(&pseudo_lld_bus);
        root_device_unregister(pseudo_primary);
@@ -7493,6 +7533,8 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
                goto unlock;
 
        for (first = true; first || qc_idx + 1 < sdebug_max_queue; )   {
+               unsigned long flags;
+               struct sdebug_scsi_cmd *sdsc;
                if (first) {
                        first = false;
                        if (!test_bit(qc_idx, sqp->in_use_bm))
@@ -7503,37 +7545,60 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
                if (qc_idx >= sdebug_max_queue)
                        break;
 
-               sqcp = &sqp->qc_arr[qc_idx];
-               sd_dp = sqcp->sd_dp;
-               if (unlikely(!sd_dp))
-                       continue;
-               scp = sqcp->a_cmnd;
+               sqcp = sqp->qc_arr[qc_idx];
+               if (!sqcp) {
+                       pr_err("sqcp is NULL, queue_num=%d, qc_idx=%u from %s\n",
+                              queue_num, qc_idx, __func__);
+                       break;
+               }
+               sd_dp = &sqcp->sd_dp;
+
+               scp = sqcp->scmd;
                if (unlikely(scp == NULL)) {
                        pr_err("scp is NULL, queue_num=%d, qc_idx=%u from %s\n",
                               queue_num, qc_idx, __func__);
                        break;
                }
+               sdsc = scsi_cmd_priv(scp);
+               spin_lock_irqsave(&sdsc->lock, flags);
                if (READ_ONCE(sd_dp->defer_t) == SDEB_DEFER_POLL) {
-                       if (kt_from_boot < sd_dp->cmpl_ts)
+                       struct sdebug_queued_cmd *_sqcp = TO_QUEUED_CMD(scp);
+
+                       if (_sqcp != sqcp) {
+                               pr_err("inconsistent queued cmd tag=%#x\n",
+                                      blk_mq_unique_tag(scsi_cmd_to_rq(scp)));
+                               spin_unlock_irqrestore(&sdsc->lock, flags);
                                continue;
+                       }
+
+                       if (kt_from_boot < sd_dp->cmpl_ts) {
+                               spin_unlock_irqrestore(&sdsc->lock, flags);
+                               continue;
+                       }
 
-               } else          /* ignoring non REQ_POLLED requests */
+               } else          /* ignoring non REQ_POLLED requests */ {
+                       spin_unlock_irqrestore(&sdsc->lock, flags);
                        continue;
+               }
                if (unlikely(atomic_read(&retired_max_queue) > 0))
                        retiring = true;
 
-               sqcp->a_cmnd = NULL;
                if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
+                       spin_unlock_irqrestore(&sdsc->lock, flags);
                        pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u from %s\n",
                                sqp, queue_num, qc_idx, __func__);
+                       sdebug_free_queued_cmd(sqcp);
                        break;
                }
+               sqp->qc_arr[qc_idx] = NULL;
                if (unlikely(retiring)) {       /* user has reduced max_queue */
                        int k, retval;
 
                        retval = atomic_read(&retired_max_queue);
                        if (qc_idx >= retval) {
                                pr_err("index %d too large\n", retval);
+                               spin_unlock_irqrestore(&sdsc->lock, flags);
+                               sdebug_free_queued_cmd(sqcp);
                                break;
                        }
                        k = find_last_bit(sqp->in_use_bm, retval);
@@ -7542,7 +7607,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
                        else
                                atomic_set(&retired_max_queue, k + 1);
                }
-               WRITE_ONCE(sd_dp->defer_t, SDEB_DEFER_NONE);
+               spin_unlock_irqrestore(&sdsc->lock, flags);
                spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 
                if (sdebug_statistics) {
@@ -7551,6 +7616,8 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
                                atomic_inc(&sdebug_miss_cpus);
                }
 
+               sdebug_free_queued_cmd(sqcp);
+
                scsi_done(scp); /* callback to mid level */
                num_entries++;
                spin_lock_irqsave(&sqp->qc_lock, iflags);
@@ -7733,6 +7800,16 @@ err_out:
        return schedule_resp(scp, NULL, DID_NO_CONNECT << 16, NULL, 0, 0);
 }
 
+static int sdebug_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
+{
+       struct sdebug_scsi_cmd *sdsc = scsi_cmd_priv(cmd);
+
+       spin_lock_init(&sdsc->lock);
+
+       return 0;
+}
+
+
 static struct scsi_host_template sdebug_driver_template = {
        .show_info =            scsi_debug_show_info,
        .write_info =           scsi_debug_write_info,
@@ -7760,6 +7837,8 @@ static struct scsi_host_template sdebug_driver_template = {
        .max_segment_size =     -1U,
        .module =               THIS_MODULE,
        .track_queue_depth =    1,
+       .cmd_size = sizeof(struct sdebug_scsi_cmd),
+       .init_cmd_priv = sdebug_init_cmd_priv,
 };
 
 static int sdebug_driver_probe(struct device *dev)