RDMA/bnxt_re: handle command completions after driver detect a timedout
authorKashyap Desai <kashyap.desai@broadcom.com>
Fri, 9 Jun 2023 11:01:47 +0000 (04:01 -0700)
committerLeon Romanovsky <leon@kernel.org>
Mon, 12 Jun 2023 07:10:17 +0000 (10:10 +0300)
If calling context detect command timeout, associated memory stored on
stack will not be valid. If firmware complete the same command later,
this causes incorrect memory access by driver.

Added is_waiter_alive to handle delayed completion by firmware.
is_waiter_alive is set and reset under command queue lock.

Signed-off-by: Kashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Link: https://lore.kernel.org/r/1686308514-11996-11-git-send-email-selvin.xavier@broadcom.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
drivers/infiniband/hw/bnxt_re/qplib_rcfw.h

index 3215f8a..4e5f66e 100644 (file)
@@ -105,7 +105,6 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
 {
        struct bnxt_qplib_cmdq_ctx *cmdq;
        u16 cbit;
-       int ret;
 
        cmdq = &rcfw->cmdq;
        cbit = cookie % rcfw->cmdq_depth;
@@ -115,9 +114,9 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
                        return bnxt_qplib_map_rc(opcode);
 
                /* Non zero means command completed */
-               ret = wait_event_timeout(cmdq->waitq,
-                                        !test_bit(cbit, cmdq->cmdq_bitmap),
-                                        msecs_to_jiffies(10000));
+               wait_event_timeout(cmdq->waitq,
+                                  !test_bit(cbit, cmdq->cmdq_bitmap),
+                                  msecs_to_jiffies(10000));
 
                if (!test_bit(cbit, cmdq->cmdq_bitmap))
                        return 0;
@@ -170,7 +169,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
 static int __send_message(struct bnxt_qplib_rcfw *rcfw,
                          struct bnxt_qplib_cmdqmsg *msg)
 {
-       u32 bsize, opcode, free_slots, required_slots;
+       u32 bsize, free_slots, required_slots;
        struct bnxt_qplib_cmdq_ctx *cmdq;
        struct bnxt_qplib_crsqe *crsqe;
        struct bnxt_qplib_cmdqe *cmdqe;
@@ -185,8 +184,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
        hwq = &cmdq->hwq;
        pdev = rcfw->pdev;
 
-       opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
-
        if (test_bit(FIRMWARE_TIMED_OUT, &cmdq->flags))
                return -ETIMEDOUT;
 
@@ -216,6 +213,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
        crsqe->free_slots = free_slots;
        crsqe->resp = (struct creq_qp_event *)msg->resp;
        crsqe->resp->cookie = cpu_to_le16(cookie);
+       crsqe->is_waiter_alive = true;
        crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
        if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
                struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb;
@@ -347,7 +345,9 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
                                          struct bnxt_qplib_cmdqmsg *msg)
 {
        struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
-       u16 cookie;
+       struct bnxt_qplib_crsqe *crsqe;
+       unsigned long flags;
+       u16 cookie, cbit;
        int rc = 0;
        u8 opcode;
 
@@ -363,6 +363,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
 
        cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
                                & RCFW_MAX_COOKIE_VALUE;
+       cbit = cookie % rcfw->cmdq_depth;
 
        if (msg->block)
                rc = __block_for_resp(rcfw, cookie, opcode);
@@ -378,6 +379,14 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
                return rc;
        }
 
+       if (rc) {
+               spin_lock_irqsave(&rcfw->cmdq.hwq.lock, flags);
+               crsqe = &rcfw->crsqe_tbl[cbit];
+               crsqe->is_waiter_alive = false;
+               spin_unlock_irqrestore(&rcfw->cmdq.hwq.lock, flags);
+               return -ETIMEDOUT;
+       }
+
        if (evnt->status) {
                /* failed with status */
                dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
@@ -480,15 +489,15 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
        struct creq_qp_error_notification *err_event;
        struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
        struct bnxt_qplib_crsqe *crsqe;
+       u32 qp_id, tbl_indx, req_size;
        struct bnxt_qplib_qp *qp;
        u16 cbit, blocked = 0;
+       bool is_waiter_alive;
        struct pci_dev *pdev;
        unsigned long flags;
        u32 wait_cmds = 0;
-       __le16  mcookie;
        u16 cookie;
        int rc = 0;
-       u32 qp_id, tbl_indx;
 
        pdev = rcfw->pdev;
        switch (qp_event->event) {
@@ -520,31 +529,29 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
                spin_lock_irqsave_nested(&hwq->lock, flags,
                                         SINGLE_DEPTH_NESTING);
                cookie = le16_to_cpu(qp_event->cookie);
-               mcookie = qp_event->cookie;
                blocked = cookie & RCFW_CMD_IS_BLOCKING;
                cookie &= RCFW_MAX_COOKIE_VALUE;
                cbit = cookie % rcfw->cmdq_depth;
                crsqe = &rcfw->crsqe_tbl[cbit];
-               if (crsqe->resp &&
-                   crsqe->resp->cookie  == mcookie) {
-                       memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
-                       crsqe->resp = NULL;
-               } else {
-                       if (crsqe->resp && crsqe->resp->cookie)
-                               dev_err(&pdev->dev,
-                                       "CMD %s cookie sent=%#x, recd=%#x\n",
-                                       crsqe->resp ? "mismatch" : "collision",
-                                       crsqe->resp ? crsqe->resp->cookie : 0,
-                                       mcookie);
-               }
                if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap))
                        dev_warn(&pdev->dev,
                                 "CMD bit %d was not requested\n", cbit);
-               hwq->cons += crsqe->req_size;
+
+               if (crsqe->is_waiter_alive) {
+                       if (crsqe->resp)
+                               memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+                       if (!blocked)
+                               wait_cmds++;
+               }
+
+               req_size = crsqe->req_size;
+               is_waiter_alive = crsqe->is_waiter_alive;
+
                crsqe->req_size = 0;
+               if (!is_waiter_alive)
+                       crsqe->resp = NULL;
 
-               if (!blocked)
-                       wait_cmds++;
+               hwq->cons += req_size;
                spin_unlock_irqrestore(&hwq->lock, flags);
        }
        *num_wait += wait_cmds;
index 089e616..6ed81c1 100644 (file)
@@ -152,6 +152,7 @@ struct bnxt_qplib_crsqe {
        u32                     req_size;
        /* Free slots at the time of submission */
        u32                     free_slots;
+       bool                    is_waiter_alive;
 };
 
 struct bnxt_qplib_rcfw_sbuf {