scsi: scsi_debug: Add new defer type for mq_poll
authorDouglas Gilbert <dgilbert@interlog.com>
Mon, 15 Feb 2021 07:40:47 +0000 (13:10 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Thu, 4 Mar 2021 22:37:03 +0000 (17:37 -0500)
Add a new sdeb_defer_type enumeration: SDEB_DEFER_POLL for requests that
have REQ_HIPRI set in cmd_flags field. It is expected that these requests
will be polled via the mq_poll entry point which is driven by calls to
blk_poll() in the block layer. Therefore timer events are not 'wired up' in
the normal fashion.

There are still cases with short delays (e.g. < 10 microseconds) where by
the time the command response processing occurs, the delay is already
exceeded in which case the code calls scsi_done() directly. In such cases
there is no window for mq_poll() to be called.

Add 'mq_polls' counter that increments on each scsi_done() called via the
mq_poll entry point. Can be used to show (with 'cat
/proc/scsi/scsi_debug/<host_id>') that blk_poll() is causing completions
rather than some other mechanism.

Link: https://lore.kernel.org/r/20210215074048.19424-5-kashyap.desai@broadcom.com
Tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/scsi_debug.c

index 5ea09fe..2ef77b9 100644 (file)
@@ -322,17 +322,19 @@ struct sdeb_store_info {
        container_of(d, struct sdebug_host_info, dev)
 
 enum sdeb_defer_type {SDEB_DEFER_NONE = 0, SDEB_DEFER_HRT = 1,
-                     SDEB_DEFER_WQ = 2};
+                     SDEB_DEFER_WQ = 2, SDEB_DEFER_POLL = 3};
 
 struct sdebug_defer {
        struct hrtimer hrt;
        struct execute_work ew;
+       ktime_t cmpl_ts;/* time since boot to complete this cmd */
        int sqa_idx;    /* index of sdebug_queue array */
        int qc_idx;     /* index of sdebug_queued_cmd array within sqa_idx */
        int hc_idx;     /* hostwide tag index */
        int issuing_cpu;
        bool init_hrt;
        bool init_wq;
+       bool init_poll;
        bool aborted;   /* true when blk_abort_request() already called */
        enum sdeb_defer_type defer_t;
 };
@@ -357,6 +359,7 @@ static atomic_t sdebug_completions;  /* count of deferred completions */
 static atomic_t sdebug_miss_cpus;    /* submission + completion cpus differ */
 static atomic_t sdebug_a_tsf;       /* 'almost task set full' counter */
 static atomic_t sdeb_inject_pending;
+static atomic_t sdeb_mq_poll_count;  /* bumped when mq_poll returns > 0 */
 
 struct opcode_info_t {
        u8 num_attached;        /* 0 if this is it (i.e. a leaf); use 0xff */
@@ -4730,7 +4733,6 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
        struct scsi_cmnd *scp;
        struct sdebug_dev_info *devip;
 
-       sd_dp->defer_t = SDEB_DEFER_NONE;
        if (unlikely(aborted))
                sd_dp->aborted = false;
        qc_idx = sd_dp->qc_idx;
@@ -4745,6 +4747,7 @@ static void sdebug_q_cmd_complete(struct sdebug_defer *sd_dp)
                return;
        }
        spin_lock_irqsave(&sqp->qc_lock, iflags);
+       sd_dp->defer_t = SDEB_DEFER_NONE;
        sqcp = &sqp->qc_arr[qc_idx];
        scp = sqcp->a_cmnd;
        if (unlikely(scp == NULL)) {
@@ -5434,13 +5437,6 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
        sd_dp = sqcp->sd_dp;
        spin_unlock_irqrestore(&sqp->qc_lock, iflags);
 
-       /* Do not complete IO from default completion path.
-        * Let it to be on queue.
-        * Completion should happen from mq_poll interface.
-        */
-       if ((sqp - sdebug_q_arr) >= (submit_queues - poll_queues))
-               return 0;
-
        if (!sd_dp) {
                sd_dp = kzalloc(sizeof(*sd_dp), GFP_ATOMIC);
                if (!sd_dp) {
@@ -5517,40 +5513,66 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
                                kt -= d;
                        }
                }
-               if (!sd_dp->init_hrt) {
-                       sd_dp->init_hrt = true;
-                       sqcp->sd_dp = sd_dp;
-                       hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC,
-                                    HRTIMER_MODE_REL_PINNED);
-                       sd_dp->hrt.function = sdebug_q_cmd_hrt_complete;
-                       sd_dp->sqa_idx = sqp - sdebug_q_arr;
-                       sd_dp->qc_idx = k;
+               sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt);
+               if (cmnd->request->cmd_flags & REQ_HIPRI) {
+                       spin_lock_irqsave(&sqp->qc_lock, iflags);
+                       if (!sd_dp->init_poll) {
+                               sd_dp->init_poll = true;
+                               sqcp->sd_dp = sd_dp;
+                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
+                               sd_dp->qc_idx = k;
+                       }
+                       sd_dp->defer_t = SDEB_DEFER_POLL;
+                       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+               } else {
+                       if (!sd_dp->init_hrt) {
+                               sd_dp->init_hrt = true;
+                               sqcp->sd_dp = sd_dp;
+                               hrtimer_init(&sd_dp->hrt, CLOCK_MONOTONIC,
+                                            HRTIMER_MODE_REL_PINNED);
+                               sd_dp->hrt.function = sdebug_q_cmd_hrt_complete;
+                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
+                               sd_dp->qc_idx = k;
+                       }
+                       sd_dp->defer_t = SDEB_DEFER_HRT;
+                       /* schedule the invocation of scsi_done() for a later time */
+                       hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED);
                }
                if (sdebug_statistics)
                        sd_dp->issuing_cpu = raw_smp_processor_id();
-               sd_dp->defer_t = SDEB_DEFER_HRT;
-               /* schedule the invocation of scsi_done() for a later time */
-               hrtimer_start(&sd_dp->hrt, kt, HRTIMER_MODE_REL_PINNED);
        } else {        /* jdelay < 0, use work queue */
-               if (!sd_dp->init_wq) {
-                       sd_dp->init_wq = true;
-                       sqcp->sd_dp = sd_dp;
-                       sd_dp->sqa_idx = sqp - sdebug_q_arr;
-                       sd_dp->qc_idx = k;
-                       INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);
-               }
-               if (sdebug_statistics)
-                       sd_dp->issuing_cpu = raw_smp_processor_id();
-               sd_dp->defer_t = SDEB_DEFER_WQ;
                if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) &&
                             atomic_read(&sdeb_inject_pending)))
                        sd_dp->aborted = true;
-               schedule_work(&sd_dp->ew.work);
-               if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) &&
-                            atomic_read(&sdeb_inject_pending))) {
+               sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot);
+               if (cmnd->request->cmd_flags & REQ_HIPRI) {
+                       spin_lock_irqsave(&sqp->qc_lock, iflags);
+                       if (!sd_dp->init_poll) {
+                               sd_dp->init_poll = true;
+                               sqcp->sd_dp = sd_dp;
+                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
+                               sd_dp->qc_idx = k;
+                       }
+                       sd_dp->defer_t = SDEB_DEFER_POLL;
+                       spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+               } else {
+                       if (!sd_dp->init_wq) {
+                               sd_dp->init_wq = true;
+                               sqcp->sd_dp = sd_dp;
+                               sd_dp->sqa_idx = sqp - sdebug_q_arr;
+                               sd_dp->qc_idx = k;
+                               INIT_WORK(&sd_dp->ew.work, sdebug_q_cmd_wq_complete);
+                       }
+                       sd_dp->defer_t = SDEB_DEFER_WQ;
+                       schedule_work(&sd_dp->ew.work);
+               }
+               if (sdebug_statistics)
+                       sd_dp->issuing_cpu = raw_smp_processor_id();
+               if (unlikely(sd_dp->aborted)) {
                        sdev_printk(KERN_INFO, sdp, "abort request tag %d\n", cmnd->request->tag);
                        blk_abort_request(cmnd->request);
                        atomic_set(&sdeb_inject_pending, 0);
+                       sd_dp->aborted = false;
                }
        }
        if (unlikely((SDEBUG_OPT_Q_NOISE & sdebug_opts) && scsi_result == device_qfull_result))
@@ -5779,11 +5801,12 @@ static int scsi_debug_show_info(struct seq_file *m, struct Scsi_Host *host)
                   dix_reads, dix_writes, dif_errors);
        seq_printf(m, "usec_in_jiffy=%lu, statistics=%d\n", TICK_NSEC / 1000,
                   sdebug_statistics);
-       seq_printf(m, "cmnd_count=%d, completions=%d, %s=%d, a_tsf=%d\n",
+       seq_printf(m, "cmnd_count=%d, completions=%d, %s=%d, a_tsf=%d, mq_polls=%d\n",
                   atomic_read(&sdebug_cmnd_count),
                   atomic_read(&sdebug_completions),
                   "miss_cpus", atomic_read(&sdebug_miss_cpus),
-                  atomic_read(&sdebug_a_tsf));
+                  atomic_read(&sdebug_a_tsf),
+                  atomic_read(&sdeb_mq_poll_count));
 
        seq_printf(m, "submit_queues=%d\n", submit_queues);
        for (j = 0, sqp = sdebug_q_arr; j < submit_queues; ++j, ++sqp) {
@@ -7247,52 +7270,68 @@ static int sdebug_map_queues(struct Scsi_Host *shost)
 
 static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
 {
-       int qc_idx;
-       int retiring = 0;
+       bool first;
+       bool retiring = false;
+       int num_entries = 0;
+       unsigned int qc_idx = 0;
        unsigned long iflags;
+       ktime_t kt_from_boot = ktime_get_boottime();
        struct sdebug_queue *sqp;
        struct sdebug_queued_cmd *sqcp;
        struct scsi_cmnd *scp;
        struct sdebug_dev_info *devip;
-       int num_entries = 0;
+       struct sdebug_defer *sd_dp;
 
        sqp = sdebug_q_arr + queue_num;
+       spin_lock_irqsave(&sqp->qc_lock, iflags);
 
-       do {
-               spin_lock_irqsave(&sqp->qc_lock, iflags);
-               qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
-               if (unlikely((qc_idx < 0) || (qc_idx >= sdebug_max_queue)))
-                       goto out;
+       for (first = true; first || qc_idx + 1 < sdebug_max_queue; )   {
+               if (first) {
+                       qc_idx = find_first_bit(sqp->in_use_bm, sdebug_max_queue);
+                       first = false;
+               } else {
+                       qc_idx = find_next_bit(sqp->in_use_bm, sdebug_max_queue, qc_idx + 1);
+               }
+               if (unlikely(qc_idx >= sdebug_max_queue))
+                       break;
 
                sqcp = &sqp->qc_arr[qc_idx];
+               sd_dp = sqcp->sd_dp;
+               if (unlikely(!sd_dp))
+                       continue;
                scp = sqcp->a_cmnd;
                if (unlikely(scp == NULL)) {
-                       pr_err("scp is NULL, queue_num=%d, qc_idx=%d from %s\n",
+                       pr_err("scp is NULL, queue_num=%d, qc_idx=%u from %s\n",
                               queue_num, qc_idx, __func__);
-                       goto out;
+                       break;
                }
+               if (sd_dp->defer_t == SDEB_DEFER_POLL) {
+                       if (kt_from_boot < sd_dp->cmpl_ts)
+                               continue;
+
+               } else          /* ignoring non REQ_HIPRI requests */
+                       continue;
                devip = (struct sdebug_dev_info *)scp->device->hostdata;
                if (likely(devip))
                        atomic_dec(&devip->num_in_q);
                else
                        pr_err("devip=NULL from %s\n", __func__);
                if (unlikely(atomic_read(&retired_max_queue) > 0))
-                       retiring = 1;
+                       retiring = true;
 
                sqcp->a_cmnd = NULL;
                if (unlikely(!test_and_clear_bit(qc_idx, sqp->in_use_bm))) {
-                       pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%d from %s\n",
+                       pr_err("Unexpected completion sqp %p queue_num=%d qc_idx=%u from %s\n",
                                sqp, queue_num, qc_idx, __func__);
-                       goto out;
+                       break;
                }
-
                if (unlikely(retiring)) {       /* user has reduced max_queue */
                        int k, retval;
 
                        retval = atomic_read(&retired_max_queue);
                        if (qc_idx >= retval) {
                                pr_err("index %d too large\n", retval);
-                               goto out;
+                               break;
                        }
                        k = find_last_bit(sqp->in_use_bm, retval);
                        if ((k < sdebug_max_queue) || (k == retval))
@@ -7300,17 +7339,18 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
                        else
                                atomic_set(&retired_max_queue, k + 1);
                }
+               sd_dp->defer_t = SDEB_DEFER_NONE;
                spin_unlock_irqrestore(&sqp->qc_lock, iflags);
                scp->scsi_done(scp); /* callback to mid level */
+               spin_lock_irqsave(&sqp->qc_lock, iflags);
                num_entries++;
-       } while (1);
-
-out:
+       }
        spin_unlock_irqrestore(&sqp->qc_lock, iflags);
+       if (num_entries > 0)
+               atomic_add(num_entries, &sdeb_mq_poll_count);
        return num_entries;
 }
 
-
 static int scsi_debug_queuecommand(struct Scsi_Host *shost,
                                   struct scsi_cmnd *scp)
 {