From 691eb7c6110fe04e38bf0f60d713482508b3d199 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 9 Jun 2023 04:01:47 -0700 Subject: [PATCH] RDMA/bnxt_re: handle command completions after driver detect a timedout If calling context detect command timeout, associated memory stored on stack will not be valid. If firmware complete the same command later, this causes incorrect memory access by driver. Added is_waiter_alive to handle delayed completion by firmware. is_waiter_alive is set and reset under command queue lock. Signed-off-by: Kashyap Desai Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1686308514-11996-11-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 59 +++++++++++++++++------------- drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 1 + 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 3215f8a..4e5f66e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -105,7 +105,6 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode) { struct bnxt_qplib_cmdq_ctx *cmdq; u16 cbit; - int ret; cmdq = &rcfw->cmdq; cbit = cookie % rcfw->cmdq_depth; @@ -115,9 +114,9 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode) return bnxt_qplib_map_rc(opcode); /* Non zero means command completed */ - ret = wait_event_timeout(cmdq->waitq, - !test_bit(cbit, cmdq->cmdq_bitmap), - msecs_to_jiffies(10000)); + wait_event_timeout(cmdq->waitq, + !test_bit(cbit, cmdq->cmdq_bitmap), + msecs_to_jiffies(10000)); if (!test_bit(cbit, cmdq->cmdq_bitmap)) return 0; @@ -170,7 +169,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode) static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_cmdqmsg *msg) { - u32 bsize, opcode, free_slots, required_slots; + u32 bsize, free_slots, required_slots; struct bnxt_qplib_cmdq_ctx *cmdq; struct bnxt_qplib_crsqe *crsqe; struct bnxt_qplib_cmdqe *cmdqe; @@ -185,8 +184,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, hwq = &cmdq->hwq; pdev = rcfw->pdev; - opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz); - if (test_bit(FIRMWARE_TIMED_OUT, &cmdq->flags)) return -ETIMEDOUT; @@ -216,6 +213,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, crsqe->free_slots = free_slots; crsqe->resp = (struct creq_qp_event *)msg->resp; crsqe->resp->cookie = cpu_to_le16(cookie); + crsqe->is_waiter_alive = true; crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz); if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) { struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb; @@ -347,7 +345,9 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_cmdqmsg *msg) { struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp; - u16 cookie; + struct bnxt_qplib_crsqe *crsqe; + unsigned long flags; + u16 cookie, cbit; int rc = 0; u8 opcode; @@ -363,6 +363,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz)) & RCFW_MAX_COOKIE_VALUE; + cbit = cookie % rcfw->cmdq_depth; if (msg->block) rc = __block_for_resp(rcfw, cookie, opcode); @@ -378,6 +379,14 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, return rc; } + if (rc) { + spin_lock_irqsave(&rcfw->cmdq.hwq.lock, flags); + crsqe = &rcfw->crsqe_tbl[cbit]; + crsqe->is_waiter_alive = false; + spin_unlock_irqrestore(&rcfw->cmdq.hwq.lock, flags); + return -ETIMEDOUT; + } + if (evnt->status) { /* failed with status */ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n", @@ -480,15 +489,15 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, struct creq_qp_error_notification *err_event; struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq; struct bnxt_qplib_crsqe *crsqe; + u32 qp_id, tbl_indx, req_size; struct bnxt_qplib_qp *qp; u16 cbit, blocked = 0; + bool is_waiter_alive; struct pci_dev *pdev; unsigned long flags; u32 wait_cmds = 0; - __le16 mcookie; u16 cookie; int rc = 0; - u32 qp_id, tbl_indx; pdev = rcfw->pdev; switch (qp_event->event) { @@ -520,31 +529,29 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, spin_lock_irqsave_nested(&hwq->lock, flags, SINGLE_DEPTH_NESTING); cookie = le16_to_cpu(qp_event->cookie); - mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; cbit = cookie % rcfw->cmdq_depth; crsqe = &rcfw->crsqe_tbl[cbit]; - if (crsqe->resp && - crsqe->resp->cookie == mcookie) { - memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); - crsqe->resp = NULL; - } else { - if (crsqe->resp && crsqe->resp->cookie) - dev_err(&pdev->dev, - "CMD %s cookie sent=%#x, recd=%#x\n", - crsqe->resp ? "mismatch" : "collision", - crsqe->resp ? crsqe->resp->cookie : 0, - mcookie); - } if (!test_and_clear_bit(cbit, rcfw->cmdq.cmdq_bitmap)) dev_warn(&pdev->dev, "CMD bit %d was not requested\n", cbit); - hwq->cons += crsqe->req_size; + + if (crsqe->is_waiter_alive) { + if (crsqe->resp) + memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); + if (!blocked) + wait_cmds++; + } + + req_size = crsqe->req_size; + is_waiter_alive = crsqe->is_waiter_alive; + crsqe->req_size = 0; + if (!is_waiter_alive) + crsqe->resp = NULL; - if (!blocked) - wait_cmds++; + hwq->cons += req_size; spin_unlock_irqrestore(&hwq->lock, flags); } *num_wait += wait_cmds; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 089e616..6ed81c1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -152,6 +152,7 @@ struct bnxt_qplib_crsqe { u32 req_size; /* Free slots at the time of submission */ u32 free_slots; + bool is_waiter_alive; }; struct bnxt_qplib_rcfw_sbuf { -- 2.7.4