staging: lustre: o2iblnd: fix race at kiblnd_connect_peer
authorDoug Oucahrek <dougso@me.com>
Wed, 2 May 2018 05:22:19 +0000 (22:22 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 3 Aug 2018 05:50:36 +0000 (07:50 +0200)
[ Upstream commit cf04968efe341b9b1c30a527e5dd61b2af9c43d2 ]

cmid will be destroyed at OFED if kiblnd_cm_callback return error.
if error happen before the end of kiblnd_connect_peer, it will touch
destroyed cmid and fail as
(o2iblnd_cb.c:1315:kiblnd_connect_peer())
            ASSERTION( cmid->device != ((void *)0) ) failed:

Signed-off-by: Alexander Boyko <alexander.boyko@seagate.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-10015
Reviewed-by: Alexey Lyashkov <c17817@cray.com>
Reviewed-by: Doug Oucharek <dougso@me.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Signed-off-by: Doug Oucharek <dougso@me.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c

index 29e1002..4a9b733 100644 (file)
@@ -1289,11 +1289,6 @@ kiblnd_connect_peer(struct kib_peer *peer)
                goto failed2;
        }
 
-       LASSERT(cmid->device);
-       CDEBUG(D_NET, "%s: connection bound to %s:%pI4h:%s\n",
-              libcfs_nid2str(peer->ibp_nid), dev->ibd_ifname,
-              &dev->ibd_ifip, cmid->device->name);
-
        return;
 
  failed2:
@@ -2995,8 +2990,19 @@ kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event)
                } else {
                        rc = rdma_resolve_route(
                                cmid, *kiblnd_tunables.kib_timeout * 1000);
-                       if (!rc)
+                       if (!rc) {
+                               struct kib_net *net = peer->ibp_ni->ni_data;
+                               struct kib_dev *dev = net->ibn_dev;
+
+                               CDEBUG(D_NET, "%s: connection bound to "\
+                                      "%s:%pI4h:%s\n",
+                                      libcfs_nid2str(peer->ibp_nid),
+                                      dev->ibd_ifname,
+                                      &dev->ibd_ifip, cmid->device->name);
+
                                return 0;
+                       }
+
                        /* Can't initiate route resolution */
                        CERROR("Can't resolve route for %s: %d\n",
                               libcfs_nid2str(peer->ibp_nid), rc);