From b6e811383062f88212082714db849127fa95142c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 21 Jun 2022 10:19:10 -0700 Subject: [PATCH] af_unix: Define a per-netns hash table. This commit adds a per netns hash table for AF_UNIX, which size is fixed as UNIX_HASH_SIZE for now. The first implementation defines a per-netns hash table as a single array of lock and list: struct unix_hashbucket { spinlock_t lock; struct hlist_head head; }; struct netns_unix { struct unix_hashbucket *hash; ... }; But, Eric pointed out memory cost that the structure has holes because of sizeof(spinlock_t), which is 4 (or more if LOCKDEP is enabled). [0] It could be expensive on a host with thousands of netns and few AF_UNIX sockets. For this reason, a per-netns hash table uses two dense arrays. struct unix_table { spinlock_t *locks; struct hlist_head *buckets; }; struct netns_unix { struct unix_table table; ... }; Note the length of the list has a significant impact rather than lock contention, so having shared locks can be an option. But, per-netns locks and lists still perform better than the global locks and per-netns lists. [1] Also, this patch adds a change so that struct netns_unix disappears from struct net if CONFIG_UNIX is disabled. [0]: https://lore.kernel.org/netdev/CANn89iLVxO5aqx16azNU7p7Z-nz5NrnM5QTqOzueVxEnkVTxyg@mail.gmail.com/ [1]: https://lore.kernel.org/netdev/20220617175215.1769-1-kuniyu@amazon.com/ Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 ++ include/net/netns/unix.h | 6 ++++++ net/unix/af_unix.c | 38 ++++++++++++++++++++++++++++++++------ 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index c4f5601..20a2992 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -120,7 +120,9 @@ struct net { struct netns_core core; struct netns_mib mib; struct netns_packet packet; +#if IS_ENABLED(CONFIG_UNIX) struct netns_unix unx; +#endif struct netns_nexthop nexthop; struct netns_ipv4 ipv4; #if IS_ENABLED(CONFIG_IPV6) diff --git a/include/net/netns/unix.h b/include/net/netns/unix.h index 91a3d7e..6f1a33d 100644 --- a/include/net/netns/unix.h +++ b/include/net/netns/unix.h @@ -5,8 +5,14 @@ #ifndef __NETNS_UNIX_H__ #define __NETNS_UNIX_H__ +struct unix_table { + spinlock_t *locks; + struct hlist_head *buckets; +}; + struct ctl_table_header; struct netns_unix { + struct unix_table table; int sysctl_max_dgram_qlen; struct ctl_table_header *ctl; }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c0804ae..cdd1288 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = { static int __net_init unix_net_init(struct net *net) { - int error = -ENOMEM; + int i; net->unx.sysctl_max_dgram_qlen = 10; if (unix_sysctl_register(net)) @@ -3567,18 +3567,44 @@ static int __net_init unix_net_init(struct net *net) #ifdef CONFIG_PROC_FS if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops, - sizeof(struct seq_net_private))) { - unix_sysctl_unregister(net); - goto out; + sizeof(struct seq_net_private))) + goto err_sysctl; +#endif + + net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE, + sizeof(spinlock_t), GFP_KERNEL); + if (!net->unx.table.locks) + goto err_proc; + + net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!net->unx.table.buckets) + goto free_locks; + + for (i = 0; i < UNIX_HASH_SIZE; i++) { + spin_lock_init(&net->unx.table.locks[i]); + INIT_HLIST_HEAD(&net->unx.table.buckets[i]); } + + return 0; + +free_locks: + kvfree(net->unx.table.locks); +err_proc: +#ifdef CONFIG_PROC_FS + remove_proc_entry("unix", net->proc_net); +err_sysctl: #endif - error = 0; + unix_sysctl_unregister(net); out: - return error; + return -ENOMEM; } static void __net_exit unix_net_exit(struct net *net) { + kvfree(net->unx.table.buckets); + kvfree(net->unx.table.locks); unix_sysctl_unregister(net); remove_proc_entry("unix", net->proc_net); } -- 2.7.4