bpf: Optimize call_rcu in non-preallocated hash map.
authorAlexei Starovoitov <ast@kernel.org>
Fri, 2 Sep 2022 21:10:49 +0000 (14:10 -0700)
committerDaniel Borkmann <daniel@iogearbox.net>
Mon, 5 Sep 2022 13:33:06 +0000 (15:33 +0200)
Doing call_rcu() million times a second becomes a bottle neck.
Convert non-preallocated hash map from call_rcu to SLAB_TYPESAFE_BY_RCU.
The rcu critical section is no longer observed for one htab element
which makes non-preallocated hash map behave just like preallocated hash map.
The map elements are released back to kernel memory after observing
rcu critical section.
This improves 'map_perf_test 4' performance from 100k events per second
to 250k events per second.

bpf_mem_alloc + percpu_counter + typesafe_by_rcu provide 10x performance
boost to non-preallocated hash map and make it within few % of preallocated map
while consuming fraction of memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220902211058.60789-8-alexei.starovoitov@gmail.com
kernel/bpf/hashtab.c
kernel/bpf/memalloc.c
tools/testing/selftests/bpf/progs/timer.c

index 36aa16d..0d888a9 100644 (file)
@@ -953,8 +953,12 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
                __pcpu_freelist_push(&htab->freelist, &l->fnode);
        } else {
                dec_elem_count(htab);
-               l->htab = htab;
-               call_rcu(&l->rcu, htab_elem_free_rcu);
+               if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+                       l->htab = htab;
+                       call_rcu(&l->rcu, htab_elem_free_rcu);
+               } else {
+                       htab_elem_free(htab, l);
+               }
        }
 }
 
index 1c46763..da0721f 100644 (file)
@@ -281,7 +281,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size)
                        return -ENOMEM;
                size += LLIST_NODE_SZ; /* room for llist_node */
                snprintf(buf, sizeof(buf), "bpf-%u", size);
-               kmem_cache = kmem_cache_create(buf, size, 8, 0, NULL);
+               kmem_cache = kmem_cache_create(buf, size, 8, SLAB_TYPESAFE_BY_RCU, NULL);
                if (!kmem_cache) {
                        free_percpu(pc);
                        return -ENOMEM;
index 5f53097..0053c54 100644 (file)
@@ -208,17 +208,6 @@ static int timer_cb2(void *map, int *key, struct hmap_elem *val)
                 */
                bpf_map_delete_elem(map, key);
 
-               /* in non-preallocated hashmap both 'key' and 'val' are RCU
-                * protected and still valid though this element was deleted
-                * from the map. Arm this timer for ~35 seconds. When callback
-                * finishes the call_rcu will invoke:
-                * htab_elem_free_rcu
-                *   check_and_free_timer
-                *     bpf_timer_cancel_and_free
-                * to cancel this 35 second sleep and delete the timer for real.
-                */
-               if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
-                       err |= 256;
                ok |= 4;
        }
        return 0;