Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
authorJakub Kicinski <kuba@kernel.org>
Sat, 28 Jan 2023 07:32:02 +0000 (23:32 -0800)
committerJakub Kicinski <kuba@kernel.org>
Sat, 28 Jan 2023 07:32:03 +0000 (23:32 -0800)
Daniel Borkmann says:

====================
bpf 2023-01-27

We've added 10 non-merge commits during the last 9 day(s) which contain
a total of 10 files changed, 170 insertions(+), 59 deletions(-).

The main changes are:

1) Fix preservation of register's parent/live fields when copying
   range-info, from Eduard Zingerman.

2) Fix an off-by-one bug in bpf_mem_cache_idx() to select the right
   cache, from Hou Tao.

3) Fix stack overflow from infinite recursion in sock_map_close(),
   from Jakub Sitnicki.

4) Fix missing btf_put() in register_btf_id_dtor_kfuncs()'s error path,
   from Jiri Olsa.

5) Fix a splat from bpf_setsockopt() via lsm_cgroup/socket_sock_rcv_skb,
   from Kui-Feng Lee.

6) Fix bpf_send_signal[_thread]() helpers to hold a reference on the task,
   from Yonghong Song.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf: Fix the kernel crash caused by bpf_setsockopt().
  selftests/bpf: Cover listener cloning with progs attached to sockmap
  selftests/bpf: Pass BPF skeleton to sockmap_listen ops tests
  bpf, sockmap: Check for any of tcp_bpf_prots when cloning a listener
  bpf, sockmap: Don't let sock_map_{close,destroy,unhash} call itself
  bpf: Add missing btf_put to register_btf_id_dtor_kfuncs
  selftests/bpf: Verify copy_register_state() preserves parent/live fields
  bpf: Fix to preserve reg parent/live fields when copying range info
  bpf: Fix a possible task gone issue with bpf_send_signal[_thread]() helpers
  bpf: Fix off-by-one error in bpf_mem_cache_idx()
====================

Link: https://lore.kernel.org/r/20230127215820.4993-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/util_macros.h
kernel/bpf/bpf_lsm.c
kernel/bpf/btf.c
kernel/bpf/memalloc.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/core/sock_map.c
net/ipv4/tcp_bpf.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/verifier/search_pruning.c

index 72299f2..43db6e4 100644 (file)
  */
 #define find_closest_descending(x, a, as) __find_closest(x, a, as, >=)
 
+/**
+ * is_insidevar - check if the @ptr points inside the @var memory range.
+ * @ptr:       the pointer to a memory address.
+ * @var:       the variable which address and size identify the memory range.
+ *
+ * Evaluates to true if the address in @ptr lies within the memory
+ * range allocated to @var.
+ */
+#define is_insidevar(ptr, var)                                         \
+       ((uintptr_t)(ptr) >= (uintptr_t)(var) &&                        \
+        (uintptr_t)(ptr) <  (uintptr_t)(var) + sizeof(var))
+
 #endif
index a4a41ee..e14c822 100644 (file)
@@ -51,7 +51,6 @@ BTF_SET_END(bpf_lsm_current_hooks)
  */
 BTF_SET_START(bpf_lsm_locked_sockopt_hooks)
 #ifdef CONFIG_SECURITY_NETWORK
-BTF_ID(func, bpf_lsm_socket_sock_rcv_skb)
 BTF_ID(func, bpf_lsm_sock_graft)
 BTF_ID(func, bpf_lsm_inet_csk_clone)
 BTF_ID(func, bpf_lsm_inet_conn_established)
index f7dd8af..b7017ca 100644 (file)
@@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
 
        sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
 
-       return 0;
 end:
-       btf_free_dtor_kfunc_tab(btf);
+       if (ret)
+               btf_free_dtor_kfunc_tab(btf);
        btf_put(btf);
        return ret;
 }
index ebcc3dd..1db1564 100644 (file)
@@ -71,7 +71,7 @@ static int bpf_mem_cache_idx(size_t size)
        if (size <= 192)
                return size_index[(size - 1) / 8] - 1;
 
-       return fls(size - 1) - 1;
+       return fls(size - 1) - 2;
 }
 
 #define NUM_CACHES 11
index dbef0b0..7ee2188 100644 (file)
@@ -3243,13 +3243,24 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
        return reg->type != SCALAR_VALUE;
 }
 
+/* Copy src state preserving dst->parent and dst->live fields */
+static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
+{
+       struct bpf_reg_state *parent = dst->parent;
+       enum bpf_reg_liveness live = dst->live;
+
+       *dst = *src;
+       dst->parent = parent;
+       dst->live = live;
+}
+
 static void save_register_state(struct bpf_func_state *state,
                                int spi, struct bpf_reg_state *reg,
                                int size)
 {
        int i;
 
-       state->stack[spi].spilled_ptr = *reg;
+       copy_register_state(&state->stack[spi].spilled_ptr, reg);
        if (size == BPF_REG_SIZE)
                state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 
@@ -3577,7 +3588,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
                                 */
                                s32 subreg_def = state->regs[dst_regno].subreg_def;
 
-                               state->regs[dst_regno] = *reg;
+                               copy_register_state(&state->regs[dst_regno], reg);
                                state->regs[dst_regno].subreg_def = subreg_def;
                        } else {
                                for (i = 0; i < size; i++) {
@@ -3598,7 +3609,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 
                if (dst_regno >= 0) {
                        /* restore register state from stack */
-                       state->regs[dst_regno] = *reg;
+                       copy_register_state(&state->regs[dst_regno], reg);
                        /* mark reg as written since spilled pointer state likely
                         * has its liveness marks cleared by is_state_visited()
                         * which resets stack/reg liveness for state transitions
@@ -9592,7 +9603,7 @@ do_sim:
         */
        if (!ptr_is_dst_reg) {
                tmp = *dst_reg;
-               *dst_reg = *ptr_reg;
+               copy_register_state(dst_reg, ptr_reg);
        }
        ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
                                        env->insn_idx);
@@ -10845,7 +10856,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
                                         * to propagate min/max range.
                                         */
                                        src_reg->id = ++env->id_gen;
-                               *dst_reg = *src_reg;
+                               copy_register_state(dst_reg, src_reg);
                                dst_reg->live |= REG_LIVE_WRITTEN;
                                dst_reg->subreg_def = DEF_NOT_SUBREG;
                        } else {
@@ -10856,7 +10867,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
                                                insn->src_reg);
                                        return -EACCES;
                                } else if (src_reg->type == SCALAR_VALUE) {
-                                       *dst_reg = *src_reg;
+                                       copy_register_state(dst_reg, src_reg);
                                        /* Make sure ID is cleared otherwise
                                         * dst_reg min/max could be incorrectly
                                         * propagated into src_reg by find_equal_scalars()
@@ -11655,7 +11666,7 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate,
 
        bpf_for_each_reg_in_vstate(vstate, state, reg, ({
                if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
-                       *reg = *known_reg;
+                       copy_register_state(reg, known_reg);
        }));
 }
 
index f47274d..c09792c 100644 (file)
@@ -833,6 +833,7 @@ static void do_bpf_send_signal(struct irq_work *entry)
 
        work = container_of(entry, struct send_signal_irq_work, irq_work);
        group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type);
+       put_task_struct(work->task);
 }
 
 static int bpf_send_signal_common(u32 sig, enum pid_type type)
@@ -867,7 +868,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type)
                 * to the irq_work. The current task may change when queued
                 * irq works get executed.
                 */
-               work->task = current;
+               work->task = get_task_struct(current);
                work->sig = sig;
                work->type = type;
                irq_work_queue(&work->irq_work);
index 22fa2c5..a68a729 100644 (file)
@@ -1569,15 +1569,16 @@ void sock_map_unhash(struct sock *sk)
        psock = sk_psock(sk);
        if (unlikely(!psock)) {
                rcu_read_unlock();
-               if (sk->sk_prot->unhash)
-                       sk->sk_prot->unhash(sk);
-               return;
+               saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
+       } else {
+               saved_unhash = psock->saved_unhash;
+               sock_map_remove_links(sk, psock);
+               rcu_read_unlock();
        }
-
-       saved_unhash = psock->saved_unhash;
-       sock_map_remove_links(sk, psock);
-       rcu_read_unlock();
-       saved_unhash(sk);
+       if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
+               return;
+       if (saved_unhash)
+               saved_unhash(sk);
 }
 EXPORT_SYMBOL_GPL(sock_map_unhash);
 
@@ -1590,17 +1591,18 @@ void sock_map_destroy(struct sock *sk)
        psock = sk_psock_get(sk);
        if (unlikely(!psock)) {
                rcu_read_unlock();
-               if (sk->sk_prot->destroy)
-                       sk->sk_prot->destroy(sk);
-               return;
+               saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
+       } else {
+               saved_destroy = psock->saved_destroy;
+               sock_map_remove_links(sk, psock);
+               rcu_read_unlock();
+               sk_psock_stop(psock);
+               sk_psock_put(sk, psock);
        }
-
-       saved_destroy = psock->saved_destroy;
-       sock_map_remove_links(sk, psock);
-       rcu_read_unlock();
-       sk_psock_stop(psock);
-       sk_psock_put(sk, psock);
-       saved_destroy(sk);
+       if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
+               return;
+       if (saved_destroy)
+               saved_destroy(sk);
 }
 EXPORT_SYMBOL_GPL(sock_map_destroy);
 
@@ -1615,16 +1617,21 @@ void sock_map_close(struct sock *sk, long timeout)
        if (unlikely(!psock)) {
                rcu_read_unlock();
                release_sock(sk);
-               return sk->sk_prot->close(sk, timeout);
+               saved_close = READ_ONCE(sk->sk_prot)->close;
+       } else {
+               saved_close = psock->saved_close;
+               sock_map_remove_links(sk, psock);
+               rcu_read_unlock();
+               sk_psock_stop(psock);
+               release_sock(sk);
+               cancel_work_sync(&psock->work);
+               sk_psock_put(sk, psock);
        }
-
-       saved_close = psock->saved_close;
-       sock_map_remove_links(sk, psock);
-       rcu_read_unlock();
-       sk_psock_stop(psock);
-       release_sock(sk);
-       cancel_work_sync(&psock->work);
-       sk_psock_put(sk, psock);
+       /* Make sure we do not recurse. This is a bug.
+        * Leak the socket instead of crashing on a stack overflow.
+        */
+       if (WARN_ON_ONCE(saved_close == sock_map_close))
+               return;
        saved_close(sk, timeout);
 }
 EXPORT_SYMBOL_GPL(sock_map_close);
index 94aad38..cf26d65 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/bpf.h>
 #include <linux/init.h>
 #include <linux/wait.h>
+#include <linux/util_macros.h>
 
 #include <net/inet_common.h>
 #include <net/tls.h>
@@ -639,10 +640,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
  */
 void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
 {
-       int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
        struct proto *prot = newsk->sk_prot;
 
-       if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+       if (is_insidevar(prot, tcp_bpf_prots))
                newsk->sk_prot = sk->sk_prot_creator;
 }
 #endif /* CONFIG_BPF_SYSCALL */
index 2cf0c7a..567e07c 100644 (file)
@@ -30,6 +30,8 @@
 #define MAX_STRERR_LEN 256
 #define MAX_TEST_NAME 80
 
+#define __always_unused        __attribute__((__unused__))
+
 #define _FAIL(errnum, fmt...)                                                  \
        ({                                                                     \
                error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
@@ -321,7 +323,8 @@ static int socket_loopback(int family, int sotype)
        return socket_loopback_reuseport(family, sotype, -1);
 }
 
-static void test_insert_invalid(int family, int sotype, int mapfd)
+static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
+                               int family, int sotype, int mapfd)
 {
        u32 key = 0;
        u64 value;
@@ -338,7 +341,8 @@ static void test_insert_invalid(int family, int sotype, int mapfd)
                FAIL_ERRNO("map_update: expected EBADF");
 }
 
-static void test_insert_opened(int family, int sotype, int mapfd)
+static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
+                              int family, int sotype, int mapfd)
 {
        u32 key = 0;
        u64 value;
@@ -359,7 +363,8 @@ static void test_insert_opened(int family, int sotype, int mapfd)
        xclose(s);
 }
 
-static void test_insert_bound(int family, int sotype, int mapfd)
+static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
+                             int family, int sotype, int mapfd)
 {
        struct sockaddr_storage addr;
        socklen_t len;
@@ -386,7 +391,8 @@ close:
        xclose(s);
 }
 
-static void test_insert(int family, int sotype, int mapfd)
+static void test_insert(struct test_sockmap_listen *skel __always_unused,
+                       int family, int sotype, int mapfd)
 {
        u64 value;
        u32 key;
@@ -402,7 +408,8 @@ static void test_insert(int family, int sotype, int mapfd)
        xclose(s);
 }
 
-static void test_delete_after_insert(int family, int sotype, int mapfd)
+static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
+                                    int family, int sotype, int mapfd)
 {
        u64 value;
        u32 key;
@@ -419,7 +426,8 @@ static void test_delete_after_insert(int family, int sotype, int mapfd)
        xclose(s);
 }
 
-static void test_delete_after_close(int family, int sotype, int mapfd)
+static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
+                                   int family, int sotype, int mapfd)
 {
        int err, s;
        u64 value;
@@ -442,7 +450,8 @@ static void test_delete_after_close(int family, int sotype, int mapfd)
                FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
 }
 
-static void test_lookup_after_insert(int family, int sotype, int mapfd)
+static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
+                                    int family, int sotype, int mapfd)
 {
        u64 cookie, value;
        socklen_t len;
@@ -470,7 +479,8 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd)
        xclose(s);
 }
 
-static void test_lookup_after_delete(int family, int sotype, int mapfd)
+static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
+                                    int family, int sotype, int mapfd)
 {
        int err, s;
        u64 value;
@@ -493,7 +503,8 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
        xclose(s);
 }
 
-static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
+                                    int family, int sotype, int mapfd)
 {
        u32 key, value32;
        int err, s;
@@ -523,7 +534,8 @@ close:
        xclose(s);
 }
 
-static void test_update_existing(int family, int sotype, int mapfd)
+static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
+                                int family, int sotype, int mapfd)
 {
        int s1, s2;
        u64 value;
@@ -551,7 +563,7 @@ close_s1:
 /* Exercise the code path where we destroy child sockets that never
  * got accept()'ed, aka orphans, when parent socket gets closed.
  */
-static void test_destroy_orphan_child(int family, int sotype, int mapfd)
+static void do_destroy_orphan_child(int family, int sotype, int mapfd)
 {
        struct sockaddr_storage addr;
        socklen_t len;
@@ -582,10 +594,38 @@ close_srv:
        xclose(s);
 }
 
+static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
+                                     int family, int sotype, int mapfd)
+{
+       int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+       int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+       const struct test {
+               int progfd;
+               enum bpf_attach_type atype;
+       } tests[] = {
+               { -1, -1 },
+               { msg_verdict, BPF_SK_MSG_VERDICT },
+               { skb_verdict, BPF_SK_SKB_VERDICT },
+       };
+       const struct test *t;
+
+       for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+               if (t->progfd != -1 &&
+                   xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
+                       return;
+
+               do_destroy_orphan_child(family, sotype, mapfd);
+
+               if (t->progfd != -1)
+                       xbpf_prog_detach2(t->progfd, mapfd, t->atype);
+       }
+}
+
 /* Perform a passive open after removing listening socket from SOCKMAP
  * to ensure that callbacks get restored properly.
  */
-static void test_clone_after_delete(int family, int sotype, int mapfd)
+static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
+                                   int family, int sotype, int mapfd)
 {
        struct sockaddr_storage addr;
        socklen_t len;
@@ -621,7 +661,8 @@ close_srv:
  * SOCKMAP, but got accept()'ed only after the parent has been removed
  * from SOCKMAP, gets cloned without parent psock state or callbacks.
  */
-static void test_accept_after_delete(int family, int sotype, int mapfd)
+static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
+                                    int family, int sotype, int mapfd)
 {
        struct sockaddr_storage addr;
        const u32 zero = 0;
@@ -675,7 +716,8 @@ close_srv:
 /* Check that child socket that got created and accepted while parent
  * was in a SOCKMAP is cloned without parent psock state or callbacks.
  */
-static void test_accept_before_delete(int family, int sotype, int mapfd)
+static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
+                                     int family, int sotype, int mapfd)
 {
        struct sockaddr_storage addr;
        const u32 zero = 0, one = 1;
@@ -784,7 +826,8 @@ done:
        return NULL;
 }
 
-static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
+static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
+                                       int family, int sotype, int mapfd)
 {
        struct connect_accept_ctx ctx = { 0 };
        struct sockaddr_storage addr;
@@ -847,7 +890,8 @@ static void *listen_thread(void *arg)
        return NULL;
 }
 
-static void test_race_insert_listen(int family, int socktype, int mapfd)
+static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
+                                   int family, int socktype, int mapfd)
 {
        struct connect_accept_ctx ctx = { 0 };
        const u32 zero = 0;
@@ -1473,7 +1517,8 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
                     int family, int sotype)
 {
        const struct op_test {
-               void (*fn)(int family, int sotype, int mapfd);
+               void (*fn)(struct test_sockmap_listen *skel,
+                          int family, int sotype, int mapfd);
                const char *name;
                int sotype;
        } tests[] = {
@@ -1520,7 +1565,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
                if (!test__start_subtest(s))
                        continue;
 
-               t->fn(family, sotype, map_fd);
+               t->fn(skel, family, sotype, map_fd);
                test_ops_cleanup(map);
        }
 }
index 68b14fd..d63fd89 100644 (file)
        .result_unpriv = ACCEPT,
        .insn_processed = 15,
 },
+/* The test performs a conditional 64-bit write to a stack location
+ * fp[-8], this is followed by an unconditional 8-bit write to fp[-8],
+ * then data is read from fp[-8]. This sequence is unsafe.
+ *
+ * The test would be mistakenly marked as safe w/o dst register parent
+ * preservation in verifier.c:copy_register_state() function.
+ *
+ * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the
+ * checkpoint state after conditional 64-bit assignment.
+ */
+{
+       "write tracking and register parent chain bug",
+       .insns = {
+       /* r6 = ktime_get_ns() */
+       BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+       /* r0 = ktime_get_ns() */
+       BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+       /* if r0 > r6 goto +1 */
+       BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_6, 1),
+       /* *(u64 *)(r10 - 8) = 0xdeadbeef */
+       BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0xdeadbeef),
+       /* r1 = 42 */
+       BPF_MOV64_IMM(BPF_REG_1, 42),
+       /* *(u8 *)(r10 - 8) = r1 */
+       BPF_STX_MEM(BPF_B, BPF_REG_FP, BPF_REG_1, -8),
+       /* r2 = *(u64 *)(r10 - 8) */
+       BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_FP, -8),
+       /* exit(0) */
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .flags = BPF_F_TEST_STATE_FREQ,
+       .errstr = "invalid read from stack off -8+1 size 8",
+       .result = REJECT,
+},