ipv4: add net_hash_mix() dispersion to fib_info_laddrhash keys
authorEric Dumazet <edumazet@google.com>
Wed, 19 Jan 2022 10:04:13 +0000 (02:04 -0800)
committerJakub Kicinski <kuba@kernel.org>
Wed, 19 Jan 2022 16:14:40 +0000 (08:14 -0800)
net/ipv4/fib_semantics.c uses a hash table (fib_info_laddrhash)
in which fib_sync_down_addr() can locate fib_info
based on IPv4 local address.

This hash table is resized based on total number of
hashed fib_info, but the hash function is only
using the local address.

For hosts having many active network namespaces,
all fib_info for loopback devices (IPv4 address 127.0.0.1)
are hashed into a single bucket, making netns dismantles
very slow.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/fib_semantics.c

index 9813949..b458986 100644 (file)
@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(fib_info_lock);
 static struct hlist_head *fib_info_hash;
 static struct hlist_head *fib_info_laddrhash;
 static unsigned int fib_info_hash_size;
+static unsigned int fib_info_hash_bits;
 static unsigned int fib_info_cnt;
 
 #define DEVINDEX_HASHBITS 8
@@ -1247,13 +1248,13 @@ int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
        return err;
 }
 
-static inline unsigned int fib_laddr_hashfn(__be32 val)
+static struct hlist_head *
+fib_info_laddrhash_bucket(const struct net *net, __be32 val)
 {
-       unsigned int mask = (fib_info_hash_size - 1);
+       u32 slot = hash_32(net_hash_mix(net) ^ (__force u32)val,
+                          fib_info_hash_bits);
 
-       return ((__force u32)val ^
-               ((__force u32)val >> 7) ^
-               ((__force u32)val >> 14)) & mask;
+       return &fib_info_laddrhash[slot];
 }
 
 static struct hlist_head *fib_info_hash_alloc(int bytes)
@@ -1289,6 +1290,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
        old_info_hash = fib_info_hash;
        old_laddrhash = fib_info_laddrhash;
        fib_info_hash_size = new_size;
+       fib_info_hash_bits = ilog2(new_size);
 
        for (i = 0; i < old_size; i++) {
                struct hlist_head *head = &fib_info_hash[i];
@@ -1306,21 +1308,20 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
        }
        fib_info_hash = new_info_hash;
 
+       fib_info_laddrhash = new_laddrhash;
        for (i = 0; i < old_size; i++) {
-               struct hlist_head *lhead = &fib_info_laddrhash[i];
+               struct hlist_head *lhead = &old_laddrhash[i];
                struct hlist_node *n;
                struct fib_info *fi;
 
                hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) {
                        struct hlist_head *ldest;
-                       unsigned int new_hash;
 
-                       new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
-                       ldest = &new_laddrhash[new_hash];
+                       ldest = fib_info_laddrhash_bucket(fi->fib_net,
+                                                         fi->fib_prefsrc);
                        hlist_add_head(&fi->fib_lhash, ldest);
                }
        }
-       fib_info_laddrhash = new_laddrhash;
 
        spin_unlock_bh(&fib_info_lock);
 
@@ -1605,7 +1606,7 @@ link_it:
        if (fi->fib_prefsrc) {
                struct hlist_head *head;
 
-               head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
+               head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc);
                hlist_add_head(&fi->fib_lhash, head);
        }
        if (fi->nh) {
@@ -1877,16 +1878,16 @@ nla_put_failure:
  */
 int fib_sync_down_addr(struct net_device *dev, __be32 local)
 {
-       int ret = 0;
-       unsigned int hash = fib_laddr_hashfn(local);
-       struct hlist_head *head = &fib_info_laddrhash[hash];
        int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
        struct net *net = dev_net(dev);
+       struct hlist_head *head;
        struct fib_info *fi;
+       int ret = 0;
 
        if (!fib_info_laddrhash || local == 0)
                return 0;
 
+       head = fib_info_laddrhash_bucket(net, local);
        hlist_for_each_entry(fi, head, fib_lhash) {
                if (!net_eq(fi->fib_net, net) ||
                    fi->fib_tb_id != tb_id)