From 989a1780698c65dfe093a6aa89ceeff84c31f528 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 29 Apr 2008 13:46:51 -0700 Subject: [PATCH] RDMA/cxgb3: Correctly serialize peer abort path Open MPI and other stress testing exposed a few bad bugs in handling aborts in the middle of a normal close. Fix these by: - serializing abort reply and peer abort processing with disconnect processing - warning (and ignoring) if ep timer is stopped when it wasn't running - cleaning up disconnect path to correctly deal with aborting and dead endpoints - in iwch_modify_qp(), taking a ref on the ep before releasing the qp lock if iwch_ep_disconnect() will be called. The ref is dropped after calling disconnect. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 100 ++++++++++++++++++++++------------ drivers/infiniband/hw/cxgb3/iwch_cm.h | 1 + drivers/infiniband/hw/cxgb3/iwch_qp.c | 6 +- 3 files changed, 72 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 72ca360..0b515d8 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -125,6 +125,12 @@ static void start_ep_timer(struct iwch_ep *ep) static void stop_ep_timer(struct iwch_ep *ep) { PDBG("%s ep %p\n", __func__, ep); + if (!timer_pending(&ep->timer)) { + printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + return; + } del_timer_sync(&ep->timer); put_ep(&ep->com); } @@ -1083,8 +1089,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) { struct iwch_ep *ep = ctx; + unsigned long flags; + int release = 0; PDBG("%s ep %p\n", __func__, ep); + BUG_ON(!ep); /* * We get 2 abort replies from the HW. The first one must @@ -1095,9 +1104,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - close_complete_upcall(ep); - state_set(&ep->com, DEAD); - release_ep_resources(ep); + spin_lock_irqsave(&ep->com.lock, flags); + switch (ep->com.state) { + case ABORTING: + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + default: + printk(KERN_ERR "%s ep %p state %d\n", + __func__, ep, ep->com.state); + break; + } + spin_unlock_irqrestore(&ep->com.lock, flags); + + if (release) + release_ep_resources(ep); return CPL_RET_BUF_DONE; } @@ -1470,7 +1492,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct sk_buff *rpl_skb; struct iwch_qp_attributes attrs; int ret; - int state; + int release = 0; + unsigned long flags; if (is_neg_adv_abort(req->status)) { PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, @@ -1488,9 +1511,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) return CPL_RET_BUF_DONE; } - state = state_read(&ep->com); - PDBG("%s ep %p state %u\n", __func__, ep, state); - switch (state) { + spin_lock_irqsave(&ep->com.lock, flags); + PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state); + switch (ep->com.state) { case CONNECTING: break; case MPA_REQ_WAIT: @@ -1536,21 +1559,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) break; case DEAD: PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); + spin_unlock_irqrestore(&ep->com.lock, flags); return CPL_RET_BUF_DONE; default: BUG_ON(1); break; } dst_confirm(ep->dst); + if (ep->com.state != ABORTING) { + __state_set(&ep->com, DEAD); + release = 1; + } + spin_unlock_irqrestore(&ep->com.lock, flags); rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); if (!rpl_skb) { printk(KERN_ERR MOD "%s - cannot allocate skb!\n", __func__); - dst_release(ep->dst); - l2t_release(L2DATA(ep->com.tdev), ep->l2t); - put_ep(&ep->com); - return CPL_RET_BUF_DONE; + release = 1; + goto out; } rpl_skb->priority = CPL_PRIORITY_DATA; rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); @@ -1559,10 +1586,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); rpl->cmd = CPL_ABORT_NO_RST; cxgb3_ofld_send(ep->com.tdev, rpl_skb); - if (state != ABORTING) { - state_set(&ep->com, DEAD); +out: + if (release) release_ep_resources(ep); - } return CPL_RET_BUF_DONE; } @@ -1661,15 +1687,18 @@ static void ep_timeout(unsigned long arg) struct iwch_ep *ep = (struct iwch_ep *)arg; struct iwch_qp_attributes attrs; unsigned long flags; + int abort = 1; spin_lock_irqsave(&ep->com.lock, flags); PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, ep->com.state); switch (ep->com.state) { case MPA_REQ_SENT: + __state_set(&ep->com, ABORTING); connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: + __state_set(&ep->com, ABORTING); break; case CLOSING: case MORIBUND: @@ -1679,13 +1708,17 @@ static void ep_timeout(unsigned long arg) ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); } + __state_set(&ep->com, ABORTING); break; default: - BUG(); + printk(KERN_ERR "%s unexpected state ep %p state %u\n", + __func__, ep, ep->com.state); + WARN_ON(1); + abort = 0; } - __state_set(&ep->com, CLOSING); spin_unlock_irqrestore(&ep->com.lock, flags); - abort_connection(ep, NULL, GFP_ATOMIC); + if (abort) + abort_connection(ep, NULL, GFP_ATOMIC); put_ep(&ep->com); } @@ -1968,40 +2001,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp) PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, states[ep->com.state], abrupt); - if (ep->com.state == DEAD) { - PDBG("%s already dead ep %p\n", __func__, ep); - goto out; - } - - if (abrupt) { - if (ep->com.state != ABORTING) { - ep->com.state = ABORTING; - close = 1; - } - goto out; - } - switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: - start_ep_timer(ep); - ep->com.state = CLOSING; close = 1; + if (abrupt) + ep->com.state = ABORTING; + else { + ep->com.state = CLOSING; + start_ep_timer(ep); + } break; case CLOSING: - ep->com.state = MORIBUND; close = 1; + if (abrupt) { + stop_ep_timer(ep); + ep->com.state = ABORTING; + } else + ep->com.state = MORIBUND; break; case MORIBUND: + case ABORTING: + case DEAD: + PDBG("%s ignoring disconnect ep %p state %u\n", + __func__, ep, ep->com.state); break; default: BUG(); break; } -out: + spin_unlock_irqrestore(&ep->com.lock, flags); if (close) { if (abrupt) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h index 2bb7fbd..a2f1b78 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.h +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h @@ -56,6 +56,7 @@ #define put_ep(ep) { \ PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ ep, atomic_read(&((ep)->kref.refcount))); \ + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ kref_put(&((ep)->kref), __free_ep); \ } diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index 8891c3b..6cd484e 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -832,6 +832,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=0; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } flush_qp(qhp, &flag); break; @@ -848,6 +849,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, abort=1; disconnect = 1; ep = qhp->ep; + get_ep(&ep->com); } goto err; break; @@ -929,8 +931,10 @@ out: * on the EP. This can be a normal close (RTS->CLOSING) or * an abnormal close (RTS/CLOSING->ERROR). */ - if (disconnect) + if (disconnect) { iwch_ep_disconnect(ep, abort, GFP_KERNEL); + put_ep(&ep->com); + } /* * If free is 1, then we've disassociated the EP from the QP -- 2.7.4