tcx: Fix splat during dev unregister
authorMartin KaFai Lau <martin.lau@kernel.org>
Fri, 28 Jul 2023 21:47:17 +0000 (23:47 +0200)
committerJakub Kicinski <kuba@kernel.org>
Mon, 31 Jul 2023 21:44:02 +0000 (14:44 -0700)
During unregister_netdevice_many_notify(), the ordering of our concerned
function calls is like this:

  unregister_netdevice_many_notify
    dev_shutdown
qdisc_put
            clsact_destroy
    tcx_uninstall

The syzbot reproducer triggered a case that the qdisc refcnt is not
zero during dev_shutdown().

tcx_uninstall() will then WARN_ON_ONCE(tcx_entry(entry)->miniq_active)
because the miniq is still active and the entry should not be freed.
The latter assumed that qdisc destruction happens before tcx teardown.

This fix is to avoid tcx_uninstall() doing tcx_entry_free() when the
miniq is still alive and let the clsact_destroy() do the free later, so
that we do not assume any specific ordering for either of them.

If still active, tcx_uninstall() does clear the entry when flushing out
the prog/link. clsact_destroy() will then notice the "!tcx_entry_is_active()"
and then does the tcx_entry_free() eventually.

Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support")
Reported-by: syzbot+376a289e86a0fd02b9ba@syzkaller.appspotmail.com
Reported-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: syzbot+376a289e86a0fd02b9ba@syzkaller.appspotmail.com
Tested-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/222255fe07cb58f15ee662e7ee78328af5b438e4.1690549248.git.daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/bpf_mprog.h
kernel/bpf/tcx.c

index 2b429488f84023fd4529bd84df48805cb4b4e609..929225f7b09594803a9480866dffa85d7efd86b1 100644 (file)
@@ -256,6 +256,22 @@ static inline void bpf_mprog_entry_copy(struct bpf_mprog_entry *dst,
        memcpy(dst->fp_items, src->fp_items, sizeof(src->fp_items));
 }
 
+static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *dst)
+{
+       memset(dst->fp_items, 0, sizeof(dst->fp_items));
+}
+
+static inline void bpf_mprog_clear_all(struct bpf_mprog_entry *entry,
+                                      struct bpf_mprog_entry **entry_new)
+{
+       struct bpf_mprog_entry *peer;
+
+       peer = bpf_mprog_peer(entry);
+       bpf_mprog_entry_clear(peer);
+       peer->parent->count = 0;
+       *entry_new = peer;
+}
+
 static inline void bpf_mprog_entry_grow(struct bpf_mprog_entry *entry, int idx)
 {
        int total = bpf_mprog_total(entry);
index 69a272712b29f3e9f27f42c580928f1b70d5910e..13f0b5dc8262a0086487c95041d8c90ee0cffff8 100644 (file)
@@ -94,15 +94,19 @@ out:
 
 void tcx_uninstall(struct net_device *dev, bool ingress)
 {
+       struct bpf_mprog_entry *entry, *entry_new = NULL;
        struct bpf_tuple tuple = {};
-       struct bpf_mprog_entry *entry;
        struct bpf_mprog_fp *fp;
        struct bpf_mprog_cp *cp;
+       bool active;
 
        entry = tcx_entry_fetch(dev, ingress);
        if (!entry)
                return;
-       tcx_entry_update(dev, NULL, ingress);
+       active = tcx_entry(entry)->miniq_active;
+       if (active)
+               bpf_mprog_clear_all(entry, &entry_new);
+       tcx_entry_update(dev, entry_new, ingress);
        tcx_entry_sync();
        bpf_mprog_foreach_tuple(entry, fp, cp, tuple) {
                if (tuple.link)
@@ -111,8 +115,8 @@ void tcx_uninstall(struct net_device *dev, bool ingress)
                        bpf_prog_put(tuple.prog);
                tcx_skeys_dec(ingress);
        }
-       WARN_ON_ONCE(tcx_entry(entry)->miniq_active);
-       tcx_entry_free(entry);
+       if (!active)
+               tcx_entry_free(entry);
 }
 
 int tcx_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr)