bpf: add bpf_link support for BPF_NETFILTER programs
authorFlorian Westphal <fw@strlen.de>
Fri, 21 Apr 2023 17:02:54 +0000 (19:02 +0200)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 21 Apr 2023 18:34:14 +0000 (11:34 -0700)
Add bpf_link support skeleton.  To keep this reviewable, no bpf program
can be invoked yet, if a program is attached only a c-stub is called and
not the actual bpf program.

Defaults to 'y' if both netfilter and bpf syscall are enabled in kconfig.

Uapi example usage:
union bpf_attr attr = { };

attr.link_create.prog_fd = progfd;
attr.link_create.attach_type = 0; /* unused */
attr.link_create.netfilter.pf = PF_INET;
attr.link_create.netfilter.hooknum = NF_INET_LOCAL_IN;
attr.link_create.netfilter.priority = -128;

err = bpf(BPF_LINK_CREATE, &attr, sizeof(attr));

... this would attach progfd to ipv4:input hook.

Such hook gets removed automatically if the calling program exits.

BPF_NETFILTER program invocation is added in followup change.

NF_HOOK_OP_BPF enum will eventually be read from nfnetlink_hook, it
allows to tell userspace which program is attached at the given hook
when user runs 'nft hook list' command rather than just the priority
and not-very-helpful 'this hook runs a bpf prog but I can't tell which
one'.

Will also be used to disallow registration of two bpf programs with
same priority in a followup patch.

v4: arm32 cmpxchg only supports 32bit operand
    s/prio/priority/
v3: restrict prog attachment to ip/ip6 for now, lets lift restrictions if
    more use cases pop up (arptables, ebtables, netdev ingress/egress etc).

Signed-off-by: Florian Westphal <fw@strlen.de>
Link: https://lore.kernel.org/r/20230421170300.24115-2-fw@strlen.de
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/netfilter.h
include/net/netfilter/nf_bpf_link.h [new file with mode: 0644]
include/uapi/linux/bpf.h
kernel/bpf/syscall.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/nf_bpf_link.c [new file with mode: 0644]

index c8e03bc..0762444 100644 (file)
@@ -80,6 +80,7 @@ typedef unsigned int nf_hookfn(void *priv,
 enum nf_hook_ops_type {
        NF_HOOK_OP_UNDEFINED,
        NF_HOOK_OP_NF_TABLES,
+       NF_HOOK_OP_BPF,
 };
 
 struct nf_hook_ops {
diff --git a/include/net/netfilter/nf_bpf_link.h b/include/net/netfilter/nf_bpf_link.h
new file mode 100644 (file)
index 0000000..eeaeaf3
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)
+int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+#else
+static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+       return -EOPNOTSUPP;
+}
+#endif
index 4b20a72..1bb11a6 100644 (file)
@@ -986,6 +986,7 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_LSM,
        BPF_PROG_TYPE_SK_LOOKUP,
        BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+       BPF_PROG_TYPE_NETFILTER,
 };
 
 enum bpf_attach_type {
@@ -1050,6 +1051,7 @@ enum bpf_link_type {
        BPF_LINK_TYPE_PERF_EVENT = 7,
        BPF_LINK_TYPE_KPROBE_MULTI = 8,
        BPF_LINK_TYPE_STRUCT_OPS = 9,
+       BPF_LINK_TYPE_NETFILTER = 10,
 
        MAX_BPF_LINK_TYPE,
 };
@@ -1560,6 +1562,12 @@ union bpf_attr {
                                 */
                                __u64           cookie;
                        } tracing;
+                       struct {
+                               __u32           pf;
+                               __u32           hooknum;
+                               __s32           priority;
+                               __u32           flags;
+                       } netfilter;
                };
        } link_create;
 
@@ -6410,6 +6418,12 @@ struct bpf_link_info {
                struct {
                        __u32 map_id;
                } struct_ops;
+               struct {
+                       __u32 pf;
+                       __u32 hooknum;
+                       __s32 priority;
+                       __u32 flags;
+               } netfilter;
        };
 } __attribute__((aligned(8)));
 
index bcf1a19..14f39c1 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/rcupdate_trace.h>
 #include <linux/memcontrol.h>
 #include <linux/trace_events.h>
+#include <net/netfilter/nf_bpf_link.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
                          (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -2462,6 +2463,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
        case BPF_PROG_TYPE_CGROUP_SYSCTL:
        case BPF_PROG_TYPE_SOCK_OPS:
        case BPF_PROG_TYPE_EXT: /* extends any prog */
+       case BPF_PROG_TYPE_NETFILTER:
                return true;
        case BPF_PROG_TYPE_CGROUP_SKB:
                /* always unpriv */
@@ -4588,6 +4590,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 
        switch (prog->type) {
        case BPF_PROG_TYPE_EXT:
+       case BPF_PROG_TYPE_NETFILTER:
                break;
        case BPF_PROG_TYPE_PERF_EVENT:
        case BPF_PROG_TYPE_TRACEPOINT:
@@ -4654,6 +4657,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
        case BPF_PROG_TYPE_XDP:
                ret = bpf_xdp_link_attach(attr, prog);
                break;
+       case BPF_PROG_TYPE_NETFILTER:
+               ret = bpf_nf_link_attach(attr, prog);
+               break;
 #endif
        case BPF_PROG_TYPE_PERF_EVENT:
        case BPF_PROG_TYPE_TRACEPOINT:
index d0bf630..441d1f1 100644 (file)
@@ -30,6 +30,9 @@ config NETFILTER_FAMILY_BRIDGE
 config NETFILTER_FAMILY_ARP
        bool
 
+config NETFILTER_BPF_LINK
+       def_bool BPF_SYSCALL
+
 config NETFILTER_NETLINK_HOOK
        tristate "Netfilter base hook dump support"
        depends on NETFILTER_ADVANCED
index 5ffef1c..d4958e7 100644 (file)
@@ -22,6 +22,7 @@ nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
 endif
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_NETFILTER_BPF_LINK) += nf_bpf_link.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
new file mode 100644 (file)
index 0000000..efa4f33
--- /dev/null
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_bpf_link.h>
+#include <uapi/linux/netfilter_ipv4.h>
+
+static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
+                                   const struct nf_hook_state *s)
+{
+       return NF_ACCEPT;
+}
+
+struct bpf_nf_link {
+       struct bpf_link link;
+       struct nf_hook_ops hook_ops;
+       struct net *net;
+       u32 dead;
+};
+
+static void bpf_nf_link_release(struct bpf_link *link)
+{
+       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+       if (nf_link->dead)
+               return;
+
+       /* prevent hook-not-found warning splat from netfilter core when
+        * .detach was already called
+        */
+       if (!cmpxchg(&nf_link->dead, 0, 1))
+               nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+}
+
+static void bpf_nf_link_dealloc(struct bpf_link *link)
+{
+       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+       kfree(nf_link);
+}
+
+static int bpf_nf_link_detach(struct bpf_link *link)
+{
+       bpf_nf_link_release(link);
+       return 0;
+}
+
+static void bpf_nf_link_show_info(const struct bpf_link *link,
+                                 struct seq_file *seq)
+{
+       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+       seq_printf(seq, "pf:\t%u\thooknum:\t%u\tprio:\t%d\n",
+                  nf_link->hook_ops.pf, nf_link->hook_ops.hooknum,
+                  nf_link->hook_ops.priority);
+}
+
+static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
+                                     struct bpf_link_info *info)
+{
+       struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+       info->netfilter.pf = nf_link->hook_ops.pf;
+       info->netfilter.hooknum = nf_link->hook_ops.hooknum;
+       info->netfilter.priority = nf_link->hook_ops.priority;
+       info->netfilter.flags = 0;
+
+       return 0;
+}
+
+static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+                             struct bpf_prog *old_prog)
+{
+       return -EOPNOTSUPP;
+}
+
+static const struct bpf_link_ops bpf_nf_link_lops = {
+       .release = bpf_nf_link_release,
+       .dealloc = bpf_nf_link_dealloc,
+       .detach = bpf_nf_link_detach,
+       .show_fdinfo = bpf_nf_link_show_info,
+       .fill_link_info = bpf_nf_link_fill_link_info,
+       .update_prog = bpf_nf_link_update,
+};
+
+static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
+{
+       switch (attr->link_create.netfilter.pf) {
+       case NFPROTO_IPV4:
+       case NFPROTO_IPV6:
+               if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS)
+                       return -EPROTO;
+               break;
+       default:
+               return -EAFNOSUPPORT;
+       }
+
+       if (attr->link_create.netfilter.flags)
+               return -EOPNOTSUPP;
+
+       /* make sure conntrack confirm is always last.
+        *
+        * In the future, if userspace can e.g. request defrag, then
+        * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
+        * should fail.
+        */
+       switch (attr->link_create.netfilter.priority) {
+       case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
+       case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
+       }
+
+       return 0;
+}
+
+int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+       struct net *net = current->nsproxy->net_ns;
+       struct bpf_link_primer link_primer;
+       struct bpf_nf_link *link;
+       int err;
+
+       if (attr->link_create.flags)
+               return -EINVAL;
+
+       err = bpf_nf_check_pf_and_hooks(attr);
+       if (err)
+               return err;
+
+       link = kzalloc(sizeof(*link), GFP_USER);
+       if (!link)
+               return -ENOMEM;
+
+       bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
+
+       link->hook_ops.hook = nf_hook_run_bpf;
+       link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
+       link->hook_ops.priv = prog;
+
+       link->hook_ops.pf = attr->link_create.netfilter.pf;
+       link->hook_ops.priority = attr->link_create.netfilter.priority;
+       link->hook_ops.hooknum = attr->link_create.netfilter.hooknum;
+
+       link->net = net;
+       link->dead = false;
+
+       err = bpf_link_prime(&link->link, &link_primer);
+       if (err) {
+               kfree(link);
+               return err;
+       }
+
+       err = nf_register_net_hook(net, &link->hook_ops);
+       if (err) {
+               bpf_link_cleanup(&link_primer);
+               return err;
+       }
+
+       return bpf_link_settle(&link_primer);
+}