bpf: Implement bpf_prog replacement for an active bpf_cgroup_link
authorAndrii Nakryiko <andriin@fb.com>
Mon, 30 Mar 2020 02:59:59 +0000 (19:59 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Tue, 31 Mar 2020 00:36:33 +0000 (17:36 -0700)
Add new operation (LINK_UPDATE), which allows to replace active bpf_prog from
under given bpf_link. Currently this is only supported for bpf_cgroup_link,
but will be extended to other kinds of bpf_links in follow-up patches.

For bpf_cgroup_link, implemented functionality matches existing semantics for
direct bpf_prog attachment (including BPF_F_REPLACE flag). User can either
unconditionally set new bpf_prog regardless of which bpf_prog is currently
active under given bpf_link, or, optionally, can specify expected active
bpf_prog. If active bpf_prog doesn't match expected one, no changes are
performed, old bpf_link stays intact and attached, operation returns
a failure.

cgroup_bpf_replace() operation is resolving race between auto-detachment and
bpf_prog update in the same fashion as it's done for bpf_link detachment,
except in this case update has no way of succeeding because of target cgroup
marked as dying. So in this case error is returned.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200330030001.2312810-3-andriin@fb.com
include/linux/bpf-cgroup.h
include/uapi/linux/bpf.h
kernel/bpf/cgroup.c
kernel/bpf/syscall.c
kernel/cgroup/cgroup.c

index d2d9696..c11b413 100644 (file)
@@ -100,6 +100,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                        struct bpf_cgroup_link *link,
                        enum bpf_attach_type type);
+int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
+                        struct bpf_prog *new_prog);
 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                       union bpf_attr __user *uattr);
 
@@ -110,6 +112,8 @@ int cgroup_bpf_attach(struct cgroup *cgrp,
                      u32 flags);
 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                      enum bpf_attach_type type);
+int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
+                      struct bpf_prog *new_prog);
 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                     union bpf_attr __user *uattr);
 
@@ -350,6 +354,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 #else
 
 struct bpf_prog;
+struct bpf_link;
 struct cgroup_bpf {};
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
@@ -373,6 +378,13 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
        return -EINVAL;
 }
 
+static inline int cgroup_bpf_replace(struct bpf_link *link,
+                                    struct bpf_prog *old_prog,
+                                    struct bpf_prog *new_prog)
+{
+       return -EINVAL;
+}
+
 static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
                                        union bpf_attr __user *uattr)
 {
index 37dffe5..2e29a67 100644 (file)
@@ -112,6 +112,7 @@ enum bpf_cmd {
        BPF_MAP_UPDATE_BATCH,
        BPF_MAP_DELETE_BATCH,
        BPF_LINK_CREATE,
+       BPF_LINK_UPDATE,
 };
 
 enum bpf_map_type {
@@ -577,6 +578,17 @@ union bpf_attr {
                __u32           attach_type;    /* attach type */
                __u32           flags;          /* extra flags */
        } link_create;
+
+       struct { /* struct used by BPF_LINK_UPDATE command */
+               __u32           link_fd;        /* link fd */
+               /* new program fd to update link with */
+               __u32           new_prog_fd;
+               __u32           flags;          /* extra flags */
+               /* expected link's program fd; is specified only if
+                * BPF_F_REPLACE flag is set in flags */
+               __u32           old_prog_fd;
+       } link_update;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
index c240299..80676fc 100644 (file)
@@ -500,6 +500,86 @@ cleanup:
        return err;
 }
 
+/* Swap updated BPF program for given link in effective program arrays across
+ * all descendant cgroups. This function is guaranteed to succeed.
+ */
+static void replace_effective_prog(struct cgroup *cgrp,
+                                  enum bpf_attach_type type,
+                                  struct bpf_cgroup_link *link)
+{
+       struct bpf_prog_array_item *item;
+       struct cgroup_subsys_state *css;
+       struct bpf_prog_array *progs;
+       struct bpf_prog_list *pl;
+       struct list_head *head;
+       struct cgroup *cg;
+       int pos;
+
+       css_for_each_descendant_pre(css, &cgrp->self) {
+               struct cgroup *desc = container_of(css, struct cgroup, self);
+
+               if (percpu_ref_is_zero(&desc->bpf.refcnt))
+                       continue;
+
+               /* find position of link in effective progs array */
+               for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
+                       if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+                               continue;
+
+                       head = &cg->bpf.progs[type];
+                       list_for_each_entry(pl, head, node) {
+                               if (!prog_list_prog(pl))
+                                       continue;
+                               if (pl->link == link)
+                                       goto found;
+                               pos++;
+                       }
+               }
+found:
+               BUG_ON(!cg);
+               progs = rcu_dereference_protected(
+                               desc->bpf.effective[type],
+                               lockdep_is_held(&cgroup_mutex));
+               item = &progs->items[pos];
+               WRITE_ONCE(item->prog, link->link.prog);
+       }
+}
+
+/**
+ * __cgroup_bpf_replace() - Replace link's program and propagate the change
+ *                          to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @link: A link for which to replace BPF program
+ * @type: Type of attach operation
+ *
+ * Must be called with cgroup_mutex held.
+ */
+int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
+                        struct bpf_prog *new_prog)
+{
+       struct list_head *progs = &cgrp->bpf.progs[link->type];
+       struct bpf_prog *old_prog;
+       struct bpf_prog_list *pl;
+       bool found = false;
+
+       if (link->link.prog->type != new_prog->type)
+               return -EINVAL;
+
+       list_for_each_entry(pl, progs, node) {
+               if (pl->link == link) {
+                       found = true;
+                       break;
+               }
+       }
+       if (!found)
+               return -ENOENT;
+
+       old_prog = xchg(&link->link.prog, new_prog);
+       replace_effective_prog(cgrp, link->type, link);
+       bpf_prog_put(old_prog);
+       return 0;
+}
+
 static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
                                               struct bpf_prog *prog,
                                               struct bpf_cgroup_link *link,
index 97d5c6f..e0a3b34 100644 (file)
@@ -3596,6 +3596,58 @@ err_out:
        return ret;
 }
 
+#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
+
+static int link_update(union bpf_attr *attr)
+{
+       struct bpf_prog *old_prog = NULL, *new_prog;
+       struct bpf_link *link;
+       u32 flags;
+       int ret;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (CHECK_ATTR(BPF_LINK_UPDATE))
+               return -EINVAL;
+
+       flags = attr->link_update.flags;
+       if (flags & ~BPF_F_REPLACE)
+               return -EINVAL;
+
+       link = bpf_link_get_from_fd(attr->link_update.link_fd);
+       if (IS_ERR(link))
+               return PTR_ERR(link);
+
+       new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
+       if (IS_ERR(new_prog))
+               return PTR_ERR(new_prog);
+
+       if (flags & BPF_F_REPLACE) {
+               old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
+               if (IS_ERR(old_prog)) {
+                       ret = PTR_ERR(old_prog);
+                       old_prog = NULL;
+                       goto out_put_progs;
+               }
+       }
+
+#ifdef CONFIG_CGROUP_BPF
+       if (link->ops == &bpf_cgroup_link_lops) {
+               ret = cgroup_bpf_replace(link, old_prog, new_prog);
+               goto out_put_progs;
+       }
+#endif
+       ret = -EINVAL;
+
+out_put_progs:
+       if (old_prog)
+               bpf_prog_put(old_prog);
+       if (ret)
+               bpf_prog_put(new_prog);
+       return ret;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
        union bpf_attr attr = {};
@@ -3709,6 +3761,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        case BPF_LINK_CREATE:
                err = link_create(&attr);
                break;
+       case BPF_LINK_UPDATE:
+               err = link_update(&attr);
+               break;
        default:
                err = -EINVAL;
                break;
index 219624f..915dda3 100644 (file)
@@ -6317,6 +6317,33 @@ int cgroup_bpf_attach(struct cgroup *cgrp,
        return ret;
 }
 
+int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
+                      struct bpf_prog *new_prog)
+{
+       struct bpf_cgroup_link *cg_link;
+       int ret;
+
+       if (link->ops != &bpf_cgroup_link_lops)
+               return -EINVAL;
+
+       cg_link = container_of(link, struct bpf_cgroup_link, link);
+
+       mutex_lock(&cgroup_mutex);
+       /* link might have been auto-released by dying cgroup, so fail */
+       if (!cg_link->cgroup) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+       if (old_prog && link->prog != old_prog) {
+               ret = -EPERM;
+               goto out_unlock;
+       }
+       ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
+out_unlock:
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+
 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                      enum bpf_attach_type type)
 {