scsi: qedi: Fix use after free during abort cleanup
authorMike Christie <michael.christie@oracle.com>
Tue, 25 May 2021 18:18:14 +0000 (13:18 -0500)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 2 Jun 2021 05:28:22 +0000 (01:28 -0400)
If qedi_tmf_work's qedi_wait_for_cleanup_request call times out we will
also force the clean up of the qedi_work_map but
qedi_process_cmd_cleanup_resp could still be accessing the qedi_cmd.

To fix this issue we extend where we hold the tmf_work_lock and back_lock
so the qedi_process_cmd_cleanup_resp access is serialized with the cleanup
done in qedi_tmf_work and any completion handling for the iscsi_task.

Link: https://lore.kernel.org/r/20210525181821.7617-22-michael.christie@oracle.com
Reviewed-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/qedi/qedi_fw.c
drivers/scsi/qedi/qedi_iscsi.h

index c12bb2d..61c1ebb 100644 (file)
@@ -729,7 +729,6 @@ static void qedi_process_nopin_local_cmpl(struct qedi_ctx *qedi,
 
 static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
                                          struct iscsi_cqe_solicited *cqe,
-                                         struct iscsi_task *task,
                                          struct iscsi_conn *conn)
 {
        struct qedi_work_map *work, *work_tmp;
@@ -741,7 +740,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
        u32 iscsi_cid;
        struct qedi_conn *qedi_conn;
        struct qedi_cmd *dbg_cmd;
-       struct iscsi_task *mtask;
+       struct iscsi_task *mtask, *task;
        struct iscsi_tm *tmf_hdr = NULL;
 
        iscsi_cid = cqe->conn_id;
@@ -767,6 +766,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
                        }
                        found = 1;
                        mtask = qedi_cmd->task;
+                       task = work->ctask;
                        tmf_hdr = (struct iscsi_tm *)mtask->hdr;
 
                        list_del_init(&work->list);
@@ -774,52 +774,47 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
                        qedi_cmd->list_tmf_work = NULL;
                }
        }
-       spin_unlock_bh(&qedi_conn->tmf_work_lock);
-
-       if (found) {
-               QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
-                         "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n",
-                         proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id);
-
-               if ((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
-                   ISCSI_TM_FUNC_ABORT_TASK) {
-                       spin_lock_bh(&conn->session->back_lock);
-
-                       protoitt = build_itt(get_itt(tmf_hdr->rtt),
-                                            conn->session->age);
-                       task = iscsi_itt_to_task(conn, protoitt);
 
-                       spin_unlock_bh(&conn->session->back_lock);
+       if (!found) {
+               spin_unlock_bh(&qedi_conn->tmf_work_lock);
+               goto check_cleanup_reqs;
+       }
 
-                       if (!task) {
-                               QEDI_NOTICE(&qedi->dbg_ctx,
-                                           "IO task completed, tmf rtt=0x%x, cid=0x%x\n",
-                                           get_itt(tmf_hdr->rtt),
-                                           qedi_conn->iscsi_conn_id);
-                               return;
-                       }
+       QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
+                 "TMF work, cqe->tid=0x%x, tmf flags=0x%x, cid=0x%x\n",
+                 proto_itt, tmf_hdr->flags, qedi_conn->iscsi_conn_id);
+
+       spin_lock_bh(&conn->session->back_lock);
+       if (iscsi_task_is_completed(task)) {
+               QEDI_NOTICE(&qedi->dbg_ctx,
+                           "IO task completed, tmf rtt=0x%x, cid=0x%x\n",
+                          get_itt(tmf_hdr->rtt), qedi_conn->iscsi_conn_id);
+               goto unlock;
+       }
 
-                       dbg_cmd = task->dd_data;
+       dbg_cmd = task->dd_data;
 
-                       QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
-                                 "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n",
-                                 get_itt(tmf_hdr->rtt), get_itt(task->itt),
-                                 dbg_cmd->task_id, qedi_conn->iscsi_conn_id);
+       QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
+                 "Abort tmf rtt=0x%x, i/o itt=0x%x, i/o tid=0x%x, cid=0x%x\n",
+                 get_itt(tmf_hdr->rtt), get_itt(task->itt), dbg_cmd->task_id,
+                 qedi_conn->iscsi_conn_id);
 
-                       if (qedi_cmd->state == CLEANUP_WAIT_FAILED)
-                               qedi_cmd->state = CLEANUP_RECV;
+       spin_lock(&qedi_conn->list_lock);
+       if (likely(dbg_cmd->io_cmd_in_list)) {
+               dbg_cmd->io_cmd_in_list = false;
+               list_del_init(&dbg_cmd->io_cmd);
+               qedi_conn->active_cmd_count--;
+       }
+       spin_unlock(&qedi_conn->list_lock);
+       qedi_cmd->state = CLEANUP_RECV;
+unlock:
+       spin_unlock_bh(&conn->session->back_lock);
+       spin_unlock_bh(&qedi_conn->tmf_work_lock);
+       wake_up_interruptible(&qedi_conn->wait_queue);
+       return;
 
-                       spin_lock(&qedi_conn->list_lock);
-                       if (likely(dbg_cmd->io_cmd_in_list)) {
-                               dbg_cmd->io_cmd_in_list = false;
-                               list_del_init(&dbg_cmd->io_cmd);
-                               qedi_conn->active_cmd_count--;
-                       }
-                       spin_unlock(&qedi_conn->list_lock);
-                       qedi_cmd->state = CLEANUP_RECV;
-                       wake_up_interruptible(&qedi_conn->wait_queue);
-               }
-       } else if (qedi_conn->cmd_cleanup_req > 0) {
+check_cleanup_reqs:
+       if (qedi_conn->cmd_cleanup_req > 0) {
                spin_lock_bh(&conn->session->back_lock);
                qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt);
                protoitt = build_itt(ptmp_itt, conn->session->age);
@@ -842,6 +837,7 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
                QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_TID,
                          "Freeing tid=0x%x for cid=0x%x\n",
                          cqe->itid, qedi_conn->iscsi_conn_id);
+               qedi_clear_task_idx(qedi_conn->qedi, cqe->itid);
 
        } else {
                qedi_get_proto_itt(qedi, cqe->itid, &ptmp_itt);
@@ -944,8 +940,7 @@ void qedi_fp_process_cqes(struct qedi_work *work)
                goto exit_fp_process;
        case ISCSI_CQE_TYPE_TASK_CLEANUP:
                QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, "CleanUp CqE\n");
-               qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, task,
-                                             conn);
+               qedi_process_cmd_cleanup_resp(qedi, &cqe->cqe_solicited, conn);
                goto exit_fp_process;
        default:
                QEDI_ERR(&qedi->dbg_ctx, "Error cqe.\n");
@@ -1372,11 +1367,25 @@ static void qedi_tmf_work(struct work_struct *work)
        tmf_hdr = (struct iscsi_tm *)mtask->hdr;
        set_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
 
-       ctask = iscsi_itt_to_task(conn, tmf_hdr->rtt);
-       if (!ctask || !ctask->sc) {
-               QEDI_ERR(&qedi->dbg_ctx, "Task already completed\n");
-               goto abort_ret;
+       spin_lock_bh(&conn->session->back_lock);
+       ctask = iscsi_itt_to_ctask(conn, tmf_hdr->rtt);
+       if (!ctask) {
+               spin_unlock_bh(&conn->session->back_lock);
+               QEDI_ERR(&qedi->dbg_ctx, "Invalid RTT. Letting abort timeout.\n");
+               goto clear_cleanup;
+       }
+
+       if (iscsi_task_is_completed(ctask)) {
+               spin_unlock_bh(&conn->session->back_lock);
+               QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+                         "Task already completed\n");
+               /*
+                * We have to still send the TMF because libiscsi needs the
+                * response to avoid a timeout.
+                */
+               goto send_tmf;
        }
+       spin_unlock_bh(&conn->session->back_lock);
 
        cmd = (struct qedi_cmd *)ctask->dd_data;
        QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
@@ -1387,19 +1396,20 @@ static void qedi_tmf_work(struct work_struct *work)
        if (qedi_do_not_recover) {
                QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n",
                         qedi_do_not_recover);
-               goto abort_ret;
+               goto clear_cleanup;
        }
 
        list_work = kzalloc(sizeof(*list_work), GFP_ATOMIC);
        if (!list_work) {
                QEDI_ERR(&qedi->dbg_ctx, "Memory allocation failed\n");
-               goto abort_ret;
+               goto clear_cleanup;
        }
 
        qedi_cmd->type = TYPEIO;
        list_work->qedi_cmd = qedi_cmd;
        list_work->rtid = cmd->task_id;
        list_work->state = QEDI_WORK_SCHEDULED;
+       list_work->ctask = ctask;
        qedi_cmd->list_tmf_work = list_work;
 
        QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
@@ -1422,6 +1432,7 @@ static void qedi_tmf_work(struct work_struct *work)
                goto ldel_exit;
        }
 
+send_tmf:
        tid = qedi_get_task_idx(qedi);
        if (tid == -1) {
                QEDI_ERR(&qedi->dbg_ctx, "Invalid tid, cid=0x%x\n",
@@ -1432,7 +1443,7 @@ static void qedi_tmf_work(struct work_struct *work)
        qedi_cmd->task_id = tid;
        qedi_send_iscsi_tmf(qedi_conn, qedi_cmd->task);
 
-abort_ret:
+clear_cleanup:
        clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
        return;
 
index 39dc27c..68ef519 100644 (file)
@@ -212,6 +212,7 @@ struct qedi_cmd {
 struct qedi_work_map {
        struct list_head list;
        struct qedi_cmd *qedi_cmd;
+       struct iscsi_task *ctask;
        int rtid;
 
        int state;