Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
authorJakub Kicinski <kuba@kernel.org>
Mon, 24 Oct 2022 17:32:00 +0000 (10:32 -0700)
committerJakub Kicinski <kuba@kernel.org>
Mon, 24 Oct 2022 17:32:01 +0000 (10:32 -0700)
Alexei Starovoitov says:

====================
pull-request: bpf 2022-10-23

We've added 7 non-merge commits during the last 18 day(s) which contain
a total of 8 files changed, 69 insertions(+), 5 deletions(-).

The main changes are:

1) Wait for busy refill_work when destroying bpf memory allocator, from Hou.

2) Allow bpf_user_ringbuf_drain() callbacks to return 1, from David.

3) Fix dispatcher patchable function entry to 5 bytes nop, from Jiri.

4) Prevent decl_tag from being referenced in func_proto, from Stanislav.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf: Use __llist_del_all() whenever possbile during memory draining
  bpf: Wait for busy refill_work when destroying bpf memory allocator
  bpf: Fix dispatcher patchable function entry to 5 bytes nop
  bpf: prevent decl_tag from being referenced in func_proto
  selftests/bpf: Add reproducer for decl_tag in func_proto return type
  selftests/bpf: Make bpf_user_ringbuf_drain() selftest callback return 1
  bpf: Allow bpf_user_ringbuf_drain() callbacks to return 1
====================

Link: https://lore.kernel.org/r/20221023192244.81137-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
arch/x86/net/bpf_jit_comp.c
include/linux/bpf.h
kernel/bpf/btf.c
kernel/bpf/dispatcher.c
kernel/bpf/memalloc.c
kernel/bpf/verifier.c
tools/testing/selftests/bpf/prog_tests/btf.c
tools/testing/selftests/bpf/progs/user_ringbuf_success.c

index 9962042..00127ab 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/bpf.h>
 #include <linux/memory.h>
 #include <linux/sort.h>
+#include <linux/init.h>
 #include <asm/extable.h>
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
@@ -388,6 +389,18 @@ out:
        return ret;
 }
 
+int __init bpf_arch_init_dispatcher_early(void *ip)
+{
+       const u8 *nop_insn = x86_nops[5];
+
+       if (is_endbr(*(u32 *)ip))
+               ip += ENDBR_INSN_SIZE;
+
+       if (memcmp(ip, nop_insn, X86_PATCH_SIZE))
+               text_poke_early(ip, nop_insn, X86_PATCH_SIZE);
+       return 0;
+}
+
 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
                       void *old_addr, void *new_addr)
 {
index 9e7d46d..0566705 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/bpfptr.h>
 #include <linux/btf.h>
 #include <linux/rcupdate_trace.h>
+#include <linux/init.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -970,6 +971,8 @@ struct bpf_trampoline *bpf_trampoline_get(u64 key,
                                          struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
 int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs);
+int __init bpf_arch_init_dispatcher_early(void *ip);
+
 #define BPF_DISPATCHER_INIT(_name) {                           \
        .mutex = __MUTEX_INITIALIZER(_name.mutex),              \
        .func = &_name##_func,                                  \
@@ -983,6 +986,13 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
        },                                                      \
 }
 
+#define BPF_DISPATCHER_INIT_CALL(_name)                                        \
+       static int __init _name##_init(void)                            \
+       {                                                               \
+               return bpf_arch_init_dispatcher_early(_name##_func);    \
+       }                                                               \
+       early_initcall(_name##_init)
+
 #ifdef CONFIG_X86_64
 #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5)))
 #else
@@ -1000,7 +1010,9 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
        }                                                               \
        EXPORT_SYMBOL(bpf_dispatcher_##name##_func);                    \
        struct bpf_dispatcher bpf_dispatcher_##name =                   \
-               BPF_DISPATCHER_INIT(bpf_dispatcher_##name);
+               BPF_DISPATCHER_INIT(bpf_dispatcher_##name);             \
+       BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name);
+
 #define DECLARE_BPF_DISPATCHER(name)                                   \
        unsigned int bpf_dispatcher_##name##_func(                      \
                const void *ctx,                                        \
index eba603c..35c07af 100644 (file)
@@ -4436,6 +4436,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env,
                        return -EINVAL;
                }
 
+               if (btf_type_is_resolve_source_only(ret_type)) {
+                       btf_verifier_log_type(env, t, "Invalid return type");
+                       return -EINVAL;
+               }
+
                if (btf_type_needs_resolve(ret_type) &&
                    !env_type_is_resolved(env, ret_type_id)) {
                        err = btf_resolve(env, ret_type, ret_type_id);
index fa64b80..04f0a04 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/hash.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
+#include <linux/init.h>
 
 /* The BPF dispatcher is a multiway branch code generator. The
  * dispatcher is a mechanism to avoid the performance penalty of an
@@ -90,6 +91,11 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n
        return -ENOTSUPP;
 }
 
+int __weak __init bpf_arch_init_dispatcher_early(void *ip)
+{
+       return -ENOTSUPP;
+}
+
 static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf)
 {
        s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
index 5f83be1..4901fa1 100644 (file)
@@ -418,14 +418,17 @@ static void drain_mem_cache(struct bpf_mem_cache *c)
        /* No progs are using this bpf_mem_cache, but htab_map_free() called
         * bpf_mem_cache_free() for all remaining elements and they can be in
         * free_by_rcu or in waiting_for_gp lists, so drain those lists now.
+        *
+        * Except for waiting_for_gp list, there are no concurrent operations
+        * on these lists, so it is safe to use __llist_del_all().
         */
        llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu))
                free_one(c, llnode);
        llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp))
                free_one(c, llnode);
-       llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist))
+       llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist))
                free_one(c, llnode);
-       llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra))
+       llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra))
                free_one(c, llnode);
 }
 
@@ -493,6 +496,16 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
                rcu_in_progress = 0;
                for_each_possible_cpu(cpu) {
                        c = per_cpu_ptr(ma->cache, cpu);
+                       /*
+                        * refill_work may be unfinished for PREEMPT_RT kernel
+                        * in which irq work is invoked in a per-CPU RT thread.
+                        * It is also possible for kernel with
+                        * arch_irq_work_has_interrupt() being false and irq
+                        * work is invoked in timer interrupt. So waiting for
+                        * the completion of irq work to ease the handling of
+                        * concurrency.
+                        */
+                       irq_work_sync(&c->refill_work);
                        drain_mem_cache(c);
                        rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
                }
@@ -507,6 +520,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
                        cc = per_cpu_ptr(ma->caches, cpu);
                        for (i = 0; i < NUM_CACHES; i++) {
                                c = &cc->cache[i];
+                               irq_work_sync(&c->refill_work);
                                drain_mem_cache(c);
                                rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
                        }
index 014ee09..7f0a9f6 100644 (file)
@@ -6946,6 +6946,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
        __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 
        callee->in_callback_fn = true;
+       callee->callback_ret_range = tnum_range(0, 1);
        return 0;
 }
 
index 127b8ca..24dd621 100644 (file)
@@ -3936,6 +3936,19 @@ static struct btf_raw_test raw_tests[] = {
        .err_str = "Invalid type_id",
 },
 {
+       .descr = "decl_tag test #16, func proto, return type",
+       .raw_types = {
+               BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),                          /* [1] */
+               BTF_VAR_ENC(NAME_TBD, 1, 0),                                            /* [2] */
+               BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 2), (-1), /* [3] */
+               BTF_FUNC_PROTO_ENC(3, 0),                                               /* [4] */
+               BTF_END_RAW,
+       },
+       BTF_STR_SEC("\0local\0tag1"),
+       .btf_load_err = true,
+       .err_str = "Invalid return type",
+},
+{
        .descr = "type_tag test #1",
        .raw_types = {
                BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
index 099c23d..b39093d 100644 (file)
@@ -47,14 +47,14 @@ record_sample(struct bpf_dynptr *dynptr, void *context)
                if (status) {
                        bpf_printk("bpf_dynptr_read() failed: %d\n", status);
                        err = 1;
-                       return 0;
+                       return 1;
                }
        } else {
                sample = bpf_dynptr_data(dynptr, 0, sizeof(*sample));
                if (!sample) {
                        bpf_printk("Unexpectedly failed to get sample\n");
                        err = 2;
-                       return 0;
+                       return 1;
                }
                stack_sample = *sample;
        }