scsi: target: tcmu: Fix and simplify timeout handling
authorBodo Stroesser <bstroesser@ts.fujitsu.com>
Sun, 26 Jul 2020 15:35:08 +0000 (17:35 +0200)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 29 Jul 2020 02:25:29 +0000 (22:25 -0400)
During cmd timeout handling in check_timedout_devices(), due to a race, it
can happen that tcmu_set_next_deadline() does not start a timer as
expected:

 1) Either tcmu_check_expired_ring_cmd() checks the inflight_queue or
    tcmu_check_expired_queue_cmd() checks the qfull_queue while jiffies has
    the value X

 2) At the end of the check the queue contains one remaining command with
    deadline X (time_after(X, X) is false and thus the command is not
    handled as being timed out).

 3) After tcmu_check_expired_xxxxx_cmd() a timer interrupt happens and
    jiffies is incremented to X+1.

 4) Now tcmu_set_next_deadline() is called, but it skips the command, since
    time_after(X+1, X) is true. Therefore tcmu_set_next_deadline() finds no
    new deadline and stops the timer, which it shouldn't.

Since commands that time out are removed from inflight_queue or
qfull_queue, we don't need the check with time_after() in
tcmu_set_next_deadline() but can use the deadline from the first cmd in
the queue.

Additionally, replace the remaining time_after() calls in
tcmu_check_expired_xxxxx_cmd() with time_after_eq(), because it is not
useful to set the timeout to deadline but then check for jiffies being
greater than deadline.

Simplify the end of tcmu_handle_completions() and change the check for no
more pending commands from

mb->cmd_tail == mb->cmd_head
to

idr_is_empty(&udev->commands)

because the old check doesn't work correctly if paddings or in the future
TMRs are in the ring.

Finally tcmu_set_next_deadline() was shifted in the source as
preparation for later implementation of tmr_notify callback.

Link: https://lore.kernel.org/r/20200726153510.13077-7-bstroesser@ts.fujitsu.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Bodo Stroesser <bstroesser@ts.fujitsu.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/target/target_core_user.c

index eb68c5f..bddd40f 100644 (file)
@@ -1129,6 +1129,18 @@ tcmu_queue_cmd(struct se_cmd *se_cmd)
        return scsi_ret;
 }
 
+static void tcmu_set_next_deadline(struct list_head *queue,
+                                  struct timer_list *timer)
+{
+       struct tcmu_cmd *cmd;
+
+       if (!list_empty(queue)) {
+               cmd = list_first_entry(queue, struct tcmu_cmd, queue_entry);
+               mod_timer(timer, cmd->deadline);
+       } else
+               del_timer(timer);
+}
+
 static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry)
 {
        struct se_cmd *se_cmd = cmd->se_cmd;
@@ -1196,25 +1208,6 @@ out:
        tcmu_free_cmd(cmd);
 }
 
-static void tcmu_set_next_deadline(struct list_head *queue,
-                                  struct timer_list *timer)
-{
-       struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
-       unsigned long deadline = 0;
-
-       list_for_each_entry_safe(tcmu_cmd, tmp_cmd, queue, queue_entry) {
-               if (!time_after(jiffies, tcmu_cmd->deadline)) {
-                       deadline = tcmu_cmd->deadline;
-                       break;
-               }
-       }
-
-       if (deadline)
-               mod_timer(timer, deadline);
-       else
-               del_timer(timer);
-}
-
 static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 {
        struct tcmu_mailbox *mb;
@@ -1267,22 +1260,16 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
                handled++;
        }
 
-       if (mb->cmd_tail == mb->cmd_head) {
-               /* no more pending commands */
-               del_timer(&udev->cmd_timer);
-
-               if (list_empty(&udev->qfull_queue)) {
-                       /*
-                        * no more pending or waiting commands so try to
-                        * reclaim blocks if needed.
-                        */
-                       if (atomic_read(&global_db_count) >
-                           tcmu_global_max_blocks)
-                               schedule_delayed_work(&tcmu_unmap_work, 0);
-               }
-       } else if (udev->cmd_time_out) {
-               tcmu_set_next_deadline(&udev->inflight_queue, &udev->cmd_timer);
+       if (atomic_read(&global_db_count) > tcmu_global_max_blocks &&
+           idr_is_empty(&udev->commands) && list_empty(&udev->qfull_queue)) {
+               /*
+                * Allocated blocks exceeded global block limit, currently no
+                * more pending or waiting commands so try to reclaim blocks.
+                */
+               schedule_delayed_work(&tcmu_unmap_work, 0);
        }
+       if (udev->cmd_time_out)
+               tcmu_set_next_deadline(&udev->inflight_queue, &udev->cmd_timer);
 
        return handled;
 }
@@ -1291,7 +1278,7 @@ static void tcmu_check_expired_ring_cmd(struct tcmu_cmd *cmd)
 {
        struct se_cmd *se_cmd;
 
-       if (!time_after(jiffies, cmd->deadline))
+       if (!time_after_eq(jiffies, cmd->deadline))
                return;
 
        set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
@@ -1310,7 +1297,7 @@ static void tcmu_check_expired_queue_cmd(struct tcmu_cmd *cmd)
 {
        struct se_cmd *se_cmd;
 
-       if (!time_after(jiffies, cmd->deadline))
+       if (!time_after_eq(jiffies, cmd->deadline))
                return;
 
        pr_debug("Timing out queued cmd %p on dev %s.\n",