bpf: Add support for writing to nf_conn:mark
authorDaniel Xu <dxu@dxuuu.xyz>
Wed, 7 Sep 2022 16:40:40 +0000 (10:40 -0600)
committerAlexei Starovoitov <ast@kernel.org>
Sun, 11 Sep 2022 00:27:32 +0000 (17:27 -0700)
Support direct writes to nf_conn:mark from TC and XDP prog types. This
is useful when applications want to store per-connection metadata. This
is also particularly useful for applications that run both bpf and
iptables/nftables because the latter can trivially access this metadata.

One example use case would be if a bpf prog is responsible for advanced
packet classification and iptables/nftables is later used for routing
due to pre-existing/legacy code.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/ebca06dea366e3e7e861c12f375a548cc4c61108.1662568410.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/net/netfilter/nf_conntrack_bpf.h
net/core/filter.c
net/netfilter/nf_conntrack_bpf.c
net/netfilter/nf_conntrack_core.c

index a473b56..a61a93d 100644 (file)
@@ -3,13 +3,22 @@
 #ifndef _NF_CONNTRACK_BPF_H
 #define _NF_CONNTRACK_BPF_H
 
+#include <linux/bpf.h>
 #include <linux/btf.h>
 #include <linux/kconfig.h>
+#include <linux/mutex.h>
 
 #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
     (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
 
 extern int register_nf_conntrack_bpf(void);
+extern void cleanup_nf_conntrack_bpf(void);
+
+extern struct mutex nf_conn_btf_access_lock;
+extern int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf,
+                      const struct btf_type *t, int off, int size,
+                      enum bpf_access_type atype, u32 *next_btf_id,
+                      enum bpf_type_flag *flag);
 
 #else
 
@@ -18,6 +27,20 @@ static inline int register_nf_conntrack_bpf(void)
        return 0;
 }
 
+static inline void cleanup_nf_conntrack_bpf(void)
+{
+}
+
+static inline int nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
+                                                const struct btf *btf,
+                                                const struct btf_type *t, int off,
+                                                int size, enum bpf_access_type atype,
+                                                u32 *next_btf_id,
+                                                enum bpf_type_flag *flag)
+{
+       return -EACCES;
+}
+
 #endif
 
 #endif /* _NF_CONNTRACK_BPF_H */
index e872f45..4b2be21 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include <linux/atomic.h>
+#include <linux/bpf_verifier.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/mm.h>
@@ -8604,6 +8605,36 @@ static bool tc_cls_act_is_valid_access(int off, int size,
        return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+DEFINE_MUTEX(nf_conn_btf_access_lock);
+EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
+
+int (*nfct_bsa)(struct bpf_verifier_log *log, const struct btf *btf,
+               const struct btf_type *t, int off, int size,
+               enum bpf_access_type atype, u32 *next_btf_id,
+               enum bpf_type_flag *flag);
+EXPORT_SYMBOL_GPL(nfct_bsa);
+
+static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
+                                       const struct btf *btf,
+                                       const struct btf_type *t, int off,
+                                       int size, enum bpf_access_type atype,
+                                       u32 *next_btf_id,
+                                       enum bpf_type_flag *flag)
+{
+       int ret = -EACCES;
+
+       if (atype == BPF_READ)
+               return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+                                        flag);
+
+       mutex_lock(&nf_conn_btf_access_lock);
+       if (nfct_bsa)
+               ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag);
+       mutex_unlock(&nf_conn_btf_access_lock);
+
+       return ret;
+}
+
 static bool __is_valid_xdp_access(int off, int size)
 {
        if (off < 0 || off >= sizeof(struct xdp_md))
@@ -8663,6 +8694,27 @@ void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog,
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
+static int xdp_btf_struct_access(struct bpf_verifier_log *log,
+                                const struct btf *btf,
+                                const struct btf_type *t, int off,
+                                int size, enum bpf_access_type atype,
+                                u32 *next_btf_id,
+                                enum bpf_type_flag *flag)
+{
+       int ret = -EACCES;
+
+       if (atype == BPF_READ)
+               return btf_struct_access(log, btf, t, off, size, atype, next_btf_id,
+                                        flag);
+
+       mutex_lock(&nf_conn_btf_access_lock);
+       if (nfct_bsa)
+               ret = nfct_bsa(log, btf, t, off, size, atype, next_btf_id, flag);
+       mutex_unlock(&nf_conn_btf_access_lock);
+
+       return ret;
+}
+
 static bool sock_addr_is_valid_access(int off, int size,
                                      enum bpf_access_type type,
                                      const struct bpf_prog *prog,
@@ -10557,6 +10609,7 @@ const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
        .convert_ctx_access     = tc_cls_act_convert_ctx_access,
        .gen_prologue           = tc_cls_act_prologue,
        .gen_ld_abs             = bpf_gen_ld_abs,
+       .btf_struct_access      = tc_cls_act_btf_struct_access,
 };
 
 const struct bpf_prog_ops tc_cls_act_prog_ops = {
@@ -10568,6 +10621,7 @@ const struct bpf_verifier_ops xdp_verifier_ops = {
        .is_valid_access        = xdp_is_valid_access,
        .convert_ctx_access     = xdp_convert_ctx_access,
        .gen_prologue           = bpf_noop_prologue,
+       .btf_struct_access      = xdp_btf_struct_access,
 };
 
 const struct bpf_prog_ops xdp_prog_ops = {
index 1cd87b2..77eb8e9 100644 (file)
@@ -6,8 +6,10 @@
  * are exposed through to BPF programs is explicitly unstable.
  */
 
+#include <linux/bpf_verifier.h>
 #include <linux/bpf.h>
 #include <linux/btf.h>
+#include <linux/mutex.h>
 #include <linux/types.h>
 #include <linux/btf_ids.h>
 #include <linux/net_namespace.h>
@@ -184,6 +186,54 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
        return ct;
 }
 
+BTF_ID_LIST(btf_nf_conn_ids)
+BTF_ID(struct, nf_conn)
+BTF_ID(struct, nf_conn___init)
+
+/* Check writes into `struct nf_conn` */
+static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
+                                          const struct btf *btf,
+                                          const struct btf_type *t, int off,
+                                          int size, enum bpf_access_type atype,
+                                          u32 *next_btf_id,
+                                          enum bpf_type_flag *flag)
+{
+       const struct btf_type *ncit;
+       const struct btf_type *nct;
+       size_t end;
+
+       ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
+       nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
+
+       if (t != nct && t != ncit) {
+               bpf_log(log, "only read is supported\n");
+               return -EACCES;
+       }
+
+       /* `struct nf_conn` and `struct nf_conn___init` have the same layout
+        * so we are safe to simply merge offset checks here
+        */
+       switch (off) {
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+       case offsetof(struct nf_conn, mark):
+               end = offsetofend(struct nf_conn, mark);
+               break;
+#endif
+       default:
+               bpf_log(log, "no write support to nf_conn at off %d\n", off);
+               return -EACCES;
+       }
+
+       if (off + size > end) {
+               bpf_log(log,
+                       "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
+                       off, size, end);
+               return -EACCES;
+       }
+
+       return 0;
+}
+
 __diag_push();
 __diag_ignore_all("-Wmissing-prototypes",
                  "Global functions as their definitions will be in nf_conntrack BTF");
@@ -449,5 +499,19 @@ int register_nf_conntrack_bpf(void)
        int ret;
 
        ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
-       return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+       ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+       if (!ret) {
+               mutex_lock(&nf_conn_btf_access_lock);
+               nfct_bsa = _nf_conntrack_btf_struct_access;
+               mutex_unlock(&nf_conn_btf_access_lock);
+       }
+
+       return ret;
+}
+
+void cleanup_nf_conntrack_bpf(void)
+{
+       mutex_lock(&nf_conn_btf_access_lock);
+       nfct_bsa = NULL;
+       mutex_unlock(&nf_conn_btf_access_lock);
 }
index da65c6e..0195f60 100644 (file)
@@ -2512,6 +2512,7 @@ static int kill_all(struct nf_conn *i, void *data)
 
 void nf_conntrack_cleanup_start(void)
 {
+       cleanup_nf_conntrack_bpf();
        conntrack_gc_work.exiting = true;
 }