ipv4: avoid quadratic behavior in netns dismantle
authorEric Dumazet <edumazet@google.com>
Wed, 19 Jan 2022 10:04:12 +0000 (02:04 -0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 27 Jan 2022 10:05:27 +0000 (11:05 +0100)
commit d07418afea8f1d9896aaf9dc5ae47ac4f45b220c upstream.

net/ipv4/fib_semantics.c uses an hash table of 256 slots,
keyed by device ifindexes: fib_info_devhash[DEVINDEX_HASHSIZE]

Problem is that with network namespaces, devices tend
to use the same ifindex.

lo device for instance has a fixed ifindex of one,
for all network namespaces.

This means that hosts with thousands of netns spend
a lot of time looking at some hash buckets with thousands
of elements, notably at netns dismantle.

Simply add a per netns perturbation (net_hash_mix())
to spread elements more uniformely.

Also change fib_devindex_hashfn() to use more entropy.

Fixes: aa79e66eee5d ("net: Make ifindex generation per-net namespace")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
net/ipv4/fib_semantics.c

index bf64a76..5dfb94a 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/netlink.h>
+#include <linux/hash.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
@@ -319,11 +320,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
 
 static inline unsigned int fib_devindex_hashfn(unsigned int val)
 {
-       unsigned int mask = DEVINDEX_HASHSIZE - 1;
+       return hash_32(val, DEVINDEX_HASHBITS);
+}
+
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+       u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
 
-       return (val ^
-               (val >> DEVINDEX_HASHBITS) ^
-               (val >> (DEVINDEX_HASHBITS * 2))) & mask;
+       return &fib_info_devhash[fib_devindex_hashfn(val)];
 }
 
 static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -433,12 +438,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
        struct hlist_head *head;
        struct fib_nh *nh;
-       unsigned int hash;
 
        spin_lock(&fib_info_lock);
 
-       hash = fib_devindex_hashfn(dev->ifindex);
-       head = &fib_info_devhash[hash];
+       head = fib_info_devhash_bucket(dev);
+
        hlist_for_each_entry(nh, head, nh_hash) {
                if (nh->fib_nh_dev == dev &&
                    nh->fib_nh_gw4 == gw &&
@@ -1607,12 +1611,10 @@ link_it:
        } else {
                change_nexthops(fi) {
                        struct hlist_head *head;
-                       unsigned int hash;
 
                        if (!nexthop_nh->fib_nh_dev)
                                continue;
-                       hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
-                       head = &fib_info_devhash[hash];
+                       head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
                        hlist_add_head(&nexthop_nh->nh_hash, head);
                } endfor_nexthops(fi)
        }
@@ -1964,8 +1966,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
-       unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-       struct hlist_head *head = &fib_info_devhash[hash];
+       struct hlist_head *head = fib_info_devhash_bucket(dev);
        struct fib_nh *nh;
 
        hlist_for_each_entry(nh, head, nh_hash) {
@@ -1984,12 +1985,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
  */
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
-       int ret = 0;
-       int scope = RT_SCOPE_NOWHERE;
+       struct hlist_head *head = fib_info_devhash_bucket(dev);
        struct fib_info *prev_fi = NULL;
-       unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-       struct hlist_head *head = &fib_info_devhash[hash];
+       int scope = RT_SCOPE_NOWHERE;
        struct fib_nh *nh;
+       int ret = 0;
 
        if (force)
                scope = -1;
@@ -2134,7 +2134,6 @@ out:
 int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 {
        struct fib_info *prev_fi;
-       unsigned int hash;
        struct hlist_head *head;
        struct fib_nh *nh;
        int ret;
@@ -2150,8 +2149,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
        }
 
        prev_fi = NULL;
-       hash = fib_devindex_hashfn(dev->ifindex);
-       head = &fib_info_devhash[hash];
+       head = fib_info_devhash_bucket(dev);
        ret = 0;
 
        hlist_for_each_entry(nh, head, nh_hash) {