RDMA/cm: Make sure the cm_id is in the IB_CM_IDLE state in destroy
authorJason Gunthorpe <jgg@mellanox.com>
Tue, 10 Mar 2020 09:25:45 +0000 (11:25 +0200)
committerJason Gunthorpe <jgg@mellanox.com>
Tue, 17 Mar 2020 20:05:54 +0000 (17:05 -0300)
The first switch statement in cm_destroy_id() tries to move the ID to
either IB_CM_IDLE or IB_CM_TIMEWAIT. Both states will block concurrent
MAD handlers from progressing.

Previous patches removed the unreliably lock/unlock sequences in this
flow, this patch removes the extra locking steps and adds the missing
parts to guarantee that destroy reaches IB_CM_IDLE. There is no point in
leaving the ID in the IB_CM_TIMEWAIT state the memory about to be kfreed.

Rework things to hold the lock across all the state transitions and
directly assert when done that it ended up in IB_CM_IDLE as expected.

This was accompanied by a careful audit of all the state transitions here,
which generally did end up in IDLE on their success and non-racy paths.

Link: https://lore.kernel.org/r/20200310092545.251365-16-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
drivers/infiniband/core/cm.c

index 9505b9e..4794113 100644 (file)
@@ -1026,34 +1026,34 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
        struct cm_work *work;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-retest:
        spin_lock_irq(&cm_id_priv->lock);
+retest:
        switch (cm_id->state) {
        case IB_CM_LISTEN:
-               spin_unlock_irq(&cm_id_priv->lock);
-
-               spin_lock_irq(&cm.lock);
+               spin_lock(&cm.lock);
                if (--cm_id_priv->listen_sharecount > 0) {
                        /* The id is still shared. */
                        WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
+                       spin_unlock(&cm.lock);
+                       spin_unlock_irq(&cm_id_priv->lock);
                        cm_deref_id(cm_id_priv);
-                       spin_unlock_irq(&cm.lock);
                        return;
                }
+               cm_id->state = IB_CM_IDLE;
                rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
                RB_CLEAR_NODE(&cm_id_priv->service_node);
-               spin_unlock_irq(&cm.lock);
+               spin_unlock(&cm.lock);
                break;
        case IB_CM_SIDR_REQ_SENT:
                cm_id->state = IB_CM_IDLE;
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-               spin_unlock_irq(&cm_id_priv->lock);
                break;
        case IB_CM_SIDR_REQ_RCVD:
                cm_send_sidr_rep_locked(cm_id_priv,
                                        &(struct ib_cm_sidr_rep_param){
                                                .status = IB_SIDR_REJECT });
-               spin_unlock_irq(&cm_id_priv->lock);
+               /* cm_send_sidr_rep_locked will not move to IDLE if it fails */
+               cm_id->state = IB_CM_IDLE;
                break;
        case IB_CM_REQ_SENT:
        case IB_CM_MRA_REQ_RCVD:
@@ -1062,18 +1062,15 @@ retest:
                                   &cm_id_priv->id.device->node_guid,
                                   sizeof(cm_id_priv->id.device->node_guid),
                                   NULL, 0);
-               spin_unlock_irq(&cm_id_priv->lock);
                break;
        case IB_CM_REQ_RCVD:
                if (err == -ENOMEM) {
                        /* Do not reject to allow future retries. */
                        cm_reset_to_idle(cm_id_priv);
-                       spin_unlock_irq(&cm_id_priv->lock);
                } else {
                        cm_send_rej_locked(cm_id_priv,
                                           IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
                                           NULL, 0);
-                       spin_unlock_irq(&cm_id_priv->lock);
                }
                break;
        case IB_CM_REP_SENT:
@@ -1085,31 +1082,35 @@ retest:
        case IB_CM_MRA_REP_SENT:
                cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
                                   0, NULL, 0);
-               spin_unlock_irq(&cm_id_priv->lock);
                break;
        case IB_CM_ESTABLISHED:
                if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
-                       spin_unlock_irq(&cm_id_priv->lock);
+                       cm_id->state = IB_CM_IDLE;
                        break;
                }
                cm_send_dreq_locked(cm_id_priv, NULL, 0);
-               spin_unlock_irq(&cm_id_priv->lock);
                goto retest;
        case IB_CM_DREQ_SENT:
                ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
                cm_enter_timewait(cm_id_priv);
-               spin_unlock_irq(&cm_id_priv->lock);
-               break;
+               goto retest;
        case IB_CM_DREQ_RCVD:
                cm_send_drep_locked(cm_id_priv, NULL, 0);
-               spin_unlock_irq(&cm_id_priv->lock);
+               WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
+               goto retest;
+       case IB_CM_TIMEWAIT:
+               /*
+                * The cm_acquire_id in cm_timewait_handler will stop working
+                * once we do cm_free_id() below, so just move to idle here for
+                * consistency.
+                */
+               cm_id->state = IB_CM_IDLE;
                break;
-       default:
-               spin_unlock_irq(&cm_id_priv->lock);
+       case IB_CM_IDLE:
                break;
        }
+       WARN_ON(cm_id->state != IB_CM_IDLE);
 
-       spin_lock_irq(&cm_id_priv->lock);
        spin_lock(&cm.lock);
        /* Required for cleanup paths related cm_req_handler() */
        if (cm_id_priv->timewait_info) {