bpf: Allocate ID for bpf_link

author Andrii Nakryiko <andriin@fb.com>

Wed, 29 Apr 2020 00:16:06 +0000 (17:16 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Wed, 29 Apr 2020 00:27:08 +0000 (17:27 -0700)
author Andrii Nakryiko <andriin@fb.com>
Wed, 29 Apr 2020 00:16:06 +0000 (17:16 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Wed, 29 Apr 2020 00:27:08 +0000 (17:27 -0700)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index 81c8620cb4c4593ccdf942dadd10599eac46dc2d..875d1f0af803987483d766253134a0359d1d2c1f 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1085,11 +1085,19 @@ int bpf_prog_new_fd(struct bpf_prog *prog);
  
  struct bpf_link {
         atomic64_t refcnt;
+       u32 id;
         const struct bpf_link_ops *ops;
         struct bpf_prog *prog;
         struct work_struct work;
  };
  
+struct bpf_link_primer {
+       struct bpf_link *link;
+       struct file *file;
+       int fd;
+       u32 id;
+};
+
  struct bpf_link_ops {
         void (*release)(struct bpf_link *link);
         void (*dealloc)(struct bpf_link *link);
@@ -1097,10 +1105,11 @@ struct bpf_link_ops {
                            struct bpf_prog *old_prog);
  };
  
-void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
-                  struct bpf_prog *prog);
-void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
-                     int link_fd);
+void bpf_link_init(struct bpf_link *link,
+                  const struct bpf_link_ops *ops, struct bpf_prog *prog);
+int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
+int bpf_link_settle(struct bpf_link_primer *primer);
+void bpf_link_cleanup(struct bpf_link_primer *primer);
  void bpf_link_inc(struct bpf_link *link);
  void bpf_link_put(struct bpf_link *link);
  int bpf_link_new_fd(struct bpf_link *link);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 4a6c47f3febea1dddcfda17c8e3ee868e424c5cc..6121aa4874654a0c889501ad203d6d3e7bfbf60f 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -523,6 +523,7 @@ union bpf_attr {
                         __u32           prog_id;
                         __u32           map_id;
                         __u32           btf_id;
+                       __u32           link_id;
                 };
                 __u32           next_id;
                 __u32           open_flags;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index da6e48e802b2c65882d6eb54df73dc5abf1a611a..1bdf37fca8791a5e2491efe7c489a899881faf04 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -841,10 +841,10 @@ const struct bpf_link_ops bpf_cgroup_link_lops = {
  
  int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
  {
+       struct bpf_link_primer link_primer;
         struct bpf_cgroup_link *link;
-       struct file *link_file;
         struct cgroup *cgrp;
-       int err, link_fd;
+       int err;
  
         if (attr->link_create.flags)
                 return -EINVAL;
@@ -862,22 +862,20 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
         link->cgroup = cgrp;
         link->type = attr->link_create.attach_type;
  
-       link_file = bpf_link_new_file(&link->link, &link_fd);
-       if (IS_ERR(link_file)) {
+       err  = bpf_link_prime(&link->link, &link_primer);
+       if (err) {
                 kfree(link);
-               err = PTR_ERR(link_file);
                 goto out_put_cgroup;
         }
  
         err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
                                 BPF_F_ALLOW_MULTI);
         if (err) {
-               bpf_link_cleanup(&link->link, link_file, link_fd);
+               bpf_link_cleanup(&link_primer);
                 goto out_put_cgroup;
         }
  
-       fd_install(link_fd, link_file);
-       return link_fd;
+       return bpf_link_settle(&link_primer);
  
  out_put_cgroup:
         cgroup_put(cgrp);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index f5358e1462eb6ba5d2adf345855c20effdd773a3..5439e05e3d258eeb092b7068bf00601bc62ab074 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -42,6 +42,8 @@ static DEFINE_IDR(prog_idr);
  static DEFINE_SPINLOCK(prog_idr_lock);
  static DEFINE_IDR(map_idr);
  static DEFINE_SPINLOCK(map_idr_lock);
+static DEFINE_IDR(link_idr);
+static DEFINE_SPINLOCK(link_idr_lock);
  
  int sysctl_unprivileged_bpf_disabled __read_mostly;
  
@@ -2181,25 +2183,38 @@ static int bpf_obj_get(const union bpf_attr *attr)
                                 attr->file_flags);
  }
  
-void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
-                  struct bpf_prog *prog)
+void bpf_link_init(struct bpf_link *link,
+                  const struct bpf_link_ops *ops, struct bpf_prog *prog)
  {
         atomic64_set(&link->refcnt, 1);
+       link->id = 0;
         link->ops = ops;
         link->prog = prog;
  }
  
+static void bpf_link_free_id(int id)
+{
+       if (!id)
+               return;
+
+       spin_lock_bh(&link_idr_lock);
+       idr_remove(&link_idr, id);
+       spin_unlock_bh(&link_idr_lock);
+}
+
  /* Clean up bpf_link and corresponding anon_inode file and FD. After
   * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
- * anon_inode's release() call. This helper manages marking bpf_link as
- * defunct, releases anon_inode file and puts reserved FD.
+ * anon_inode's release() call. This helper marksbpf_link as
+ * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
+ * is not decremented, it's the responsibility of a calling code that failed
+ * to complete bpf_link initialization.
   */
-void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
-                     int link_fd)
+void bpf_link_cleanup(struct bpf_link_primer *primer)
  {
-       link->prog = NULL;
-       fput(link_file);
-       put_unused_fd(link_fd);
+       primer->link->prog = NULL;
+       bpf_link_free_id(primer->id);
+       fput(primer->file);
+       put_unused_fd(primer->fd);
  }
  
  void bpf_link_inc(struct bpf_link *link)
@@ -2210,6 +2225,7 @@ void bpf_link_inc(struct bpf_link *link)
  /* bpf_link_free is guaranteed to be called from process context */
  static void bpf_link_free(struct bpf_link *link)
  {
+       bpf_link_free_id(link->id);
         if (link->prog) {
                 /* detach BPF program, clean up used resources */
                 link->ops->release(link);
@@ -2275,9 +2291,11 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
         bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
         seq_printf(m,
                    "link_type:\t%s\n"
+                  "link_id:\t%u\n"
                    "prog_tag:\t%s\n"
                    "prog_id:\t%u\n",
                    link_type,
+                  link->id,
                    prog_tag,
                    prog->aux->id);
  }
@@ -2292,36 +2310,76 @@ static const struct file_operations bpf_link_fops = {
         .write          = bpf_dummy_write,
  };
  
-int bpf_link_new_fd(struct bpf_link *link)
+static int bpf_link_alloc_id(struct bpf_link *link)
  {
-       return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
-}
+       int id;
+
+       idr_preload(GFP_KERNEL);
+       spin_lock_bh(&link_idr_lock);
+       id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
+       spin_unlock_bh(&link_idr_lock);
+       idr_preload_end();
  
-/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
- * instead of immediately installing fd in fdtable, just reserve it and
- * return. Caller then need to either install it with fd_install(fd, file) or
- * release with put_unused_fd(fd).
- * This is useful for cases when bpf_link attachment/detachment are
- * complicated and expensive operations and should be delayed until all the fd
- * reservation and anon_inode creation succeeds.
+       return id;
+}
+
+/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
+ * reserving unused FD and allocating ID from link_idr. This is to be paired
+ * with bpf_link_settle() to install FD and ID and expose bpf_link to
+ * user-space, if bpf_link is successfully attached. If not, bpf_link and
+ * pre-allocated resources are to be freed with bpf_cleanup() call. All the
+ * transient state is passed around in struct bpf_link_primer.
+ * This is preferred way to create and initialize bpf_link, especially when
+ * there are complicated and expensive operations inbetween creating bpf_link
+ * itself and attaching it to BPF hook. By using bpf_link_prime() and
+ * bpf_link_settle() kernel code using bpf_link doesn't have to perform
+ * expensive (and potentially failing) roll back operations in a rare case
+ * that file, FD, or ID can't be allocated.
   */
-struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
+int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
  {
         struct file *file;
-       int fd;
+       int fd, id;
  
         fd = get_unused_fd_flags(O_CLOEXEC);
         if (fd < 0)
-               return ERR_PTR(fd);
+               return fd;
  
         file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
         if (IS_ERR(file)) {
                 put_unused_fd(fd);
-               return file;
+               return PTR_ERR(file);
         }
  
-       *reserved_fd = fd;
-       return file;
+       id = bpf_link_alloc_id(link);
+       if (id < 0) {
+               put_unused_fd(fd);
+               fput(file);
+               return id;
+       }
+
+       primer->link = link;
+       primer->file = file;
+       primer->fd = fd;
+       primer->id = id;
+       return 0;
+}
+
+int bpf_link_settle(struct bpf_link_primer *primer)
+{
+       /* make bpf_link fetchable by ID */
+       spin_lock_bh(&link_idr_lock);
+       primer->link->id = primer->id;
+       spin_unlock_bh(&link_idr_lock);
+       /* make bpf_link fetchable by FD */
+       fd_install(primer->fd, primer->file);
+       /* pass through installed FD */
+       return primer->fd;
+}
+
+int bpf_link_new_fd(struct bpf_link *link)
+{
+       return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
  }
  
  struct bpf_link *bpf_link_get_from_fd(u32 ufd)
@@ -2367,9 +2425,9 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
  
  static int bpf_tracing_prog_attach(struct bpf_prog *prog)
  {
+       struct bpf_link_primer link_primer;
         struct bpf_tracing_link *link;
-       struct file *link_file;
-       int link_fd, err;
+       int err;
  
         switch (prog->type) {
         case BPF_PROG_TYPE_TRACING:
@@ -2404,22 +2462,19 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
         }
         bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
  
-       link_file = bpf_link_new_file(&link->link, &link_fd);
-       if (IS_ERR(link_file)) {
+       err = bpf_link_prime(&link->link, &link_primer);
+       if (err) {
                 kfree(link);
-               err = PTR_ERR(link_file);
                 goto out_put_prog;
         }
  
         err = bpf_trampoline_link_prog(prog);
         if (err) {
-               bpf_link_cleanup(&link->link, link_file, link_fd);
+               bpf_link_cleanup(&link_primer);
                 goto out_put_prog;
         }
  
-       fd_install(link_fd, link_file);
-       return link_fd;
-
+       return bpf_link_settle(&link_primer);
  out_put_prog:
         bpf_prog_put(prog);
         return err;
@@ -2447,7 +2502,7 @@ static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
         kfree(raw_tp);
  }
  
-static const struct bpf_link_ops bpf_raw_tp_lops = {
+static const struct bpf_link_ops bpf_raw_tp_link_lops = {
         .release = bpf_raw_tp_link_release,
         .dealloc = bpf_raw_tp_link_dealloc,
  };
@@ -2456,13 +2511,13 @@ static const struct bpf_link_ops bpf_raw_tp_lops = {
  
  static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
  {
+       struct bpf_link_primer link_primer;
         struct bpf_raw_tp_link *link;
         struct bpf_raw_event_map *btp;
-       struct file *link_file;
         struct bpf_prog *prog;
         const char *tp_name;
         char buf[128];
-       int link_fd, err;
+       int err;
  
         if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
                 return -EINVAL;
@@ -2515,24 +2570,22 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
                 err = -ENOMEM;
                 goto out_put_btp;
         }
-       bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
+       bpf_link_init(&link->link, &bpf_raw_tp_link_lops, prog);
         link->btp = btp;
  
-       link_file = bpf_link_new_file(&link->link, &link_fd);
-       if (IS_ERR(link_file)) {
+       err = bpf_link_prime(&link->link, &link_primer);
+       if (err) {
                 kfree(link);
-               err = PTR_ERR(link_file);
                 goto out_put_btp;
         }
  
         err = bpf_probe_register(link->btp, prog);
         if (err) {
-               bpf_link_cleanup(&link->link, link_file, link_fd);
+               bpf_link_cleanup(&link_primer);
                 goto out_put_btp;
         }
  
-       fd_install(link_fd, link_file);
-       return link_fd;
+       return bpf_link_settle(&link_primer);
  
  out_put_btp:
         bpf_put_raw_tracepoint(btp);
@@ -3464,7 +3517,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
         if (file->f_op == &bpf_link_fops) {
                 struct bpf_link *link = file->private_data;
  
-               if (link->ops == &bpf_raw_tp_lops) {
+               if (link->ops == &bpf_raw_tp_link_lops) {
                         struct bpf_raw_tp_link *raw_tp =
                                 container_of(link, struct bpf_raw_tp_link, link);
                         struct bpf_raw_event_map *btp = raw_tp->btp;
author	Andrii Nakryiko <andriin@fb.com>
	Wed, 29 Apr 2020 00:16:06 +0000 (17:16 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Wed, 29 Apr 2020 00:27:08 +0000 (17:27 -0700)
include/linux/bpf.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/cgroup.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history