RDMA/core: Add a netevent notifier to cma
authorPatrisious Haddad <phaddad@nvidia.com>
Tue, 7 Jun 2022 11:32:44 +0000 (14:32 +0300)
committerLeon Romanovsky <leonro@nvidia.com>
Thu, 16 Jun 2022 06:54:42 +0000 (09:54 +0300)
Add a netevent callback for cma, mainly to catch NETEVENT_NEIGH_UPDATE.

Previously, when a system with failover MAC mechanism change its MAC address
during a CM connection attempt, the RDMA-CM would take a lot of time till
it disconnects and timesout due to the incorrect MAC address.

Now when we get a NETEVENT_NEIGH_UPDATE we check if it is due to a failover
MAC change and if so, we instantly destroy the CM and notify the user in order
to spare the unnecessary waiting for the timeout.

Link: https://lore.kernel.org/r/bb255c9e301cd50b905663b8e73f7f5133d0e4c5.1654601342.git.leonro@nvidia.com
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/core/cma.c
include/rdma/rdma_cm.h

index 0a17b1b..46d0667 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/netevent.h>
 #include <net/tcp.h>
 #include <net/ipv6.h>
 #include <net/ip_fib.h>
@@ -5047,10 +5048,87 @@ out:
        return ret;
 }
 
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+       struct rdma_id_private *id_priv =
+               container_of(_work, struct rdma_id_private, id.net_work);
+       struct rdma_cm_event event = {};
+
+       mutex_lock(&id_priv->handler_mutex);
+
+       if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+           READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+               goto out_unlock;
+
+       event.event = RDMA_CM_EVENT_UNREACHABLE;
+       event.status = -ETIMEDOUT;
+
+       if (cma_cm_event_handler(id_priv, &event)) {
+               __acquire(&id_priv->handler_mutex);
+               id_priv->cm_id.ib = NULL;
+               cma_id_put(id_priv);
+               destroy_id_handler_unlock(id_priv);
+               return;
+       }
+
+out_unlock:
+       mutex_unlock(&id_priv->handler_mutex);
+       cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+                                unsigned long event, void *ctx)
+{
+       struct id_table_entry *ips_node = NULL;
+       struct rdma_id_private *current_id;
+       struct neighbour *neigh = ctx;
+       unsigned long flags;
+
+       if (event != NETEVENT_NEIGH_UPDATE)
+               return NOTIFY_DONE;
+
+       spin_lock_irqsave(&id_table_lock, flags);
+       if (neigh->tbl->family == AF_INET6) {
+               struct sockaddr_in6 neigh_sock_6;
+
+               neigh_sock_6.sin6_family = AF_INET6;
+               neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+               ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+                                            (struct sockaddr *)&neigh_sock_6);
+       } else if (neigh->tbl->family == AF_INET) {
+               struct sockaddr_in neigh_sock_4;
+
+               neigh_sock_4.sin_family = AF_INET;
+               neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+               ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+                                            (struct sockaddr *)&neigh_sock_4);
+       } else
+               goto out;
+
+       if (!ips_node)
+               goto out;
+
+       list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+               if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+                          neigh->ha, ETH_ALEN))
+                       continue;
+               INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
+               cma_id_get(current_id);
+               queue_work(cma_wq, &current_id->id.net_work);
+       }
+out:
+       spin_unlock_irqrestore(&id_table_lock, flags);
+       return NOTIFY_DONE;
+}
+
 static struct notifier_block cma_nb = {
        .notifier_call = cma_netdev_callback
 };
 
+static struct notifier_block cma_netevent_cb = {
+       .notifier_call = cma_netevent_callback
+};
+
 static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
 {
        struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -5273,6 +5351,7 @@ static int __init cma_init(void)
 
        ib_sa_register_client(&sa_client);
        register_netdevice_notifier(&cma_nb);
+       register_netevent_notifier(&cma_netevent_cb);
 
        ret = ib_register_client(&cma_client);
        if (ret)
@@ -5287,6 +5366,7 @@ static int __init cma_init(void)
 err_ib:
        ib_unregister_client(&cma_client);
 err:
+       unregister_netevent_notifier(&cma_netevent_cb);
        unregister_netdevice_notifier(&cma_nb);
        ib_sa_unregister_client(&sa_client);
        unregister_pernet_subsys(&cma_pernet_operations);
@@ -5299,6 +5379,7 @@ static void __exit cma_cleanup(void)
 {
        cma_configfs_exit();
        ib_unregister_client(&cma_client);
+       unregister_netevent_notifier(&cma_netevent_cb);
        unregister_netdevice_notifier(&cma_nb);
        ib_sa_unregister_client(&sa_client);
        unregister_pernet_subsys(&cma_pernet_operations);
index d989f03..5b18e2e 100644 (file)
@@ -108,6 +108,7 @@ struct rdma_cm_id {
        enum rdma_ucm_port_space ps;
        enum ib_qp_type          qp_type;
        u32                      port_num;
+       struct work_struct net_work;
 };
 
 struct rdma_cm_id *