bpf: Implement bpf_prog replacement for an active bpf_cgroup_link

author Andrii Nakryiko <andriin@fb.com>

Mon, 30 Mar 2020 02:59:59 +0000 (19:59 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Tue, 31 Mar 2020 00:36:33 +0000 (17:36 -0700)
author Andrii Nakryiko <andriin@fb.com>
Mon, 30 Mar 2020 02:59:59 +0000 (19:59 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Tue, 31 Mar 2020 00:36:33 +0000 (17:36 -0700)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index d2d9696695640fe4d5477bc911dea811a93079a0..c11b413d5b1a60fadf7e6fcca71aeb94e98c3629 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -100,6 +100,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
  int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                         struct bpf_cgroup_link *link,
                         enum bpf_attach_type type);
+int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
+                        struct bpf_prog *new_prog);
  int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                        union bpf_attr __user *uattr);
  
@@ -110,6 +112,8 @@ int cgroup_bpf_attach(struct cgroup *cgrp,
                       u32 flags);
  int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                       enum bpf_attach_type type);
+int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
+                      struct bpf_prog *new_prog);
  int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
                      union bpf_attr __user *uattr);
  
@@ -350,6 +354,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
  #else
  
  struct bpf_prog;
+struct bpf_link;
  struct cgroup_bpf {};
  static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
  static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
@@ -373,6 +378,13 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
         return -EINVAL;
  }
  
+static inline int cgroup_bpf_replace(struct bpf_link *link,
+                                    struct bpf_prog *old_prog,
+                                    struct bpf_prog *new_prog)
+{
+       return -EINVAL;
+}
+
  static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
                                         union bpf_attr __user *uattr)
  {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 37dffe5089a09c26a4b3757e0091069d3e752234..2e29a671d67eb96a3139c893158aa0fc69466213 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -112,6 +112,7 @@ enum bpf_cmd {
         BPF_MAP_UPDATE_BATCH,
         BPF_MAP_DELETE_BATCH,
         BPF_LINK_CREATE,
+       BPF_LINK_UPDATE,
  };
  
  enum bpf_map_type {
@@ -577,6 +578,17 @@ union bpf_attr {
                 __u32           attach_type;    /* attach type */
                 __u32           flags;          /* extra flags */
         } link_create;
+
+       struct { /* struct used by BPF_LINK_UPDATE command */
+               __u32           link_fd;        /* link fd */
+               /* new program fd to update link with */
+               __u32           new_prog_fd;
+               __u32           flags;          /* extra flags */
+               /* expected link's program fd; is specified only if
+                * BPF_F_REPLACE flag is set in flags */
+               __u32           old_prog_fd;
+       } link_update;
+
  } __attribute__((aligned(8)));
  
  /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index c240299374319958080a2ee235a88eef99270bda..80676fc00d81984c48ad3bf0c72ec3e5201a8e95 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -500,6 +500,86 @@ cleanup:
         return err;
  }
  
+/* Swap updated BPF program for given link in effective program arrays across
+ * all descendant cgroups. This function is guaranteed to succeed.
+ */
+static void replace_effective_prog(struct cgroup *cgrp,
+                                  enum bpf_attach_type type,
+                                  struct bpf_cgroup_link *link)
+{
+       struct bpf_prog_array_item *item;
+       struct cgroup_subsys_state *css;
+       struct bpf_prog_array *progs;
+       struct bpf_prog_list *pl;
+       struct list_head *head;
+       struct cgroup *cg;
+       int pos;
+
+       css_for_each_descendant_pre(css, &cgrp->self) {
+               struct cgroup *desc = container_of(css, struct cgroup, self);
+
+               if (percpu_ref_is_zero(&desc->bpf.refcnt))
+                       continue;
+
+               /* find position of link in effective progs array */
+               for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
+                       if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+                               continue;
+
+                       head = &cg->bpf.progs[type];
+                       list_for_each_entry(pl, head, node) {
+                               if (!prog_list_prog(pl))
+                                       continue;
+                               if (pl->link == link)
+                                       goto found;
+                               pos++;
+                       }
+               }
+found:
+               BUG_ON(!cg);
+               progs = rcu_dereference_protected(
+                               desc->bpf.effective[type],
+                               lockdep_is_held(&cgroup_mutex));
+               item = &progs->items[pos];
+               WRITE_ONCE(item->prog, link->link.prog);
+       }
+}
+
+/**
+ * __cgroup_bpf_replace() - Replace link's program and propagate the change
+ *                          to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @link: A link for which to replace BPF program
+ * @type: Type of attach operation
+ *
+ * Must be called with cgroup_mutex held.
+ */
+int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
+                        struct bpf_prog *new_prog)
+{
+       struct list_head *progs = &cgrp->bpf.progs[link->type];
+       struct bpf_prog *old_prog;
+       struct bpf_prog_list *pl;
+       bool found = false;
+
+       if (link->link.prog->type != new_prog->type)
+               return -EINVAL;
+
+       list_for_each_entry(pl, progs, node) {
+               if (pl->link == link) {
+                       found = true;
+                       break;
+               }
+       }
+       if (!found)
+               return -ENOENT;
+
+       old_prog = xchg(&link->link.prog, new_prog);
+       replace_effective_prog(cgrp, link->type, link);
+       bpf_prog_put(old_prog);
+       return 0;
+}
+
  static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
                                                struct bpf_prog *prog,
                                                struct bpf_cgroup_link *link,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 97d5c6fb63cda8bfd58cad262c096b7ae9a5eb5c..e0a3b34d70396a81ca089aed5d7393176e0a057f 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3596,6 +3596,58 @@ err_out:
         return ret;
  }
  
+#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
+
+static int link_update(union bpf_attr *attr)
+{
+       struct bpf_prog *old_prog = NULL, *new_prog;
+       struct bpf_link *link;
+       u32 flags;
+       int ret;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (CHECK_ATTR(BPF_LINK_UPDATE))
+               return -EINVAL;
+
+       flags = attr->link_update.flags;
+       if (flags & ~BPF_F_REPLACE)
+               return -EINVAL;
+
+       link = bpf_link_get_from_fd(attr->link_update.link_fd);
+       if (IS_ERR(link))
+               return PTR_ERR(link);
+
+       new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
+       if (IS_ERR(new_prog))
+               return PTR_ERR(new_prog);
+
+       if (flags & BPF_F_REPLACE) {
+               old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
+               if (IS_ERR(old_prog)) {
+                       ret = PTR_ERR(old_prog);
+                       old_prog = NULL;
+                       goto out_put_progs;
+               }
+       }
+
+#ifdef CONFIG_CGROUP_BPF
+       if (link->ops == &bpf_cgroup_link_lops) {
+               ret = cgroup_bpf_replace(link, old_prog, new_prog);
+               goto out_put_progs;
+       }
+#endif
+       ret = -EINVAL;
+
+out_put_progs:
+       if (old_prog)
+               bpf_prog_put(old_prog);
+       if (ret)
+               bpf_prog_put(new_prog);
+       return ret;
+}
+
  SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
  {
         union bpf_attr attr = {};
@@ -3709,6 +3761,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
         case BPF_LINK_CREATE:
                 err = link_create(&attr);
                 break;
+       case BPF_LINK_UPDATE:
+               err = link_update(&attr);
+               break;
         default:
                 err = -EINVAL;
                 break;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index 219624fba9bafe555f499b40ab1150889ba6fc59..915dda3f7f1949bcd09c1b5b1cadd9be7596f6b3 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6317,6 +6317,33 @@ int cgroup_bpf_attach(struct cgroup *cgrp,
         return ret;
  }
  
+int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
+                      struct bpf_prog *new_prog)
+{
+       struct bpf_cgroup_link *cg_link;
+       int ret;
+
+       if (link->ops != &bpf_cgroup_link_lops)
+               return -EINVAL;
+
+       cg_link = container_of(link, struct bpf_cgroup_link, link);
+
+       mutex_lock(&cgroup_mutex);
+       /* link might have been auto-released by dying cgroup, so fail */
+       if (!cg_link->cgroup) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+       if (old_prog && link->prog != old_prog) {
+               ret = -EPERM;
+               goto out_unlock;
+       }
+       ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
+out_unlock:
+       mutex_unlock(&cgroup_mutex);
+       return ret;
+}
+
  int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
                       enum bpf_attach_type type)
  {
author	Andrii Nakryiko <andriin@fb.com>
	Mon, 30 Mar 2020 02:59:59 +0000 (19:59 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Tue, 31 Mar 2020 00:36:33 +0000 (17:36 -0700)
include/linux/bpf-cgroup.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/cgroup.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/cgroup/cgroup.c		patch \| blob \| history