bpf: Change uapi for bpf iterator map elements
authorYonghong Song <yhs@fb.com>
Wed, 5 Aug 2020 05:50:56 +0000 (22:50 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 6 Aug 2020 23:39:14 +0000 (16:39 -0700)
Commit a5cbe05a6673 ("bpf: Implement bpf iterator for
map elements") added bpf iterator support for
map elements. The map element bpf iterator requires
info to identify a particular map. In the above
commit, the attr->link_create.target_fd is used
to carry map_fd and an enum bpf_iter_link_info
is added to uapi to specify the target_fd actually
representing a map_fd:
    enum bpf_iter_link_info {
BPF_ITER_LINK_UNSPEC = 0,
BPF_ITER_LINK_MAP_FD = 1,

MAX_BPF_ITER_LINK_INFO,
    };

This is an extensible approach as we can grow
enumerator for pid, cgroup_id, etc. and we can
unionize target_fd for pid, cgroup_id, etc.
But in the future, there are chances that
more complex customization may happen, e.g.,
for tasks, it could be filtered based on
both cgroup_id and user_id.

This patch changed the uapi to have fields
__aligned_u64 iter_info;
__u32 iter_info_len;
for additional iter_info for link_create.
The iter_info is defined as
union bpf_iter_link_info {
struct {
__u32   map_fd;
} map;
};

So future extension for additional customization
will be easier. The bpf_iter_link_info will be
passed to target callback to validate and generic
bpf_iter framework does not need to deal it any
more.

Note that map_fd = 0 will be considered invalid
and -EBADF will be returned to user space.

Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com
include/linux/bpf.h
include/uapi/linux/bpf.h
kernel/bpf/bpf_iter.c
kernel/bpf/map_iter.c
kernel/bpf/syscall.c
net/core/bpf_sk_storage.c

index cef4ef0..55f694b 100644 (file)
@@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info {
        struct bpf_map *map;
 };
 
-typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
-                                      struct bpf_iter_aux_info *aux);
+typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
+                                       union bpf_iter_link_info *linfo,
+                                       struct bpf_iter_aux_info *aux);
+typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
 
 #define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
        const char *target;
-       bpf_iter_check_target_t check_target;
+       bpf_iter_attach_target_t attach_target;
+       bpf_iter_detach_target_t detach_target;
        u32 ctx_arg_info_size;
-       enum bpf_iter_link_info req_linfo;
        struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
        const struct bpf_iter_seq_info *seq_info;
 };
index b134e67..0480f89 100644 (file)
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
        __u32   attach_type;            /* program attach type */
 };
 
+union bpf_iter_link_info {
+       struct {
+               __u32   map_fd;
+       } map;
+};
+
 /* BPF syscall commands, see bpf(2) man-page for details. */
 enum bpf_cmd {
        BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
        MAX_BPF_LINK_TYPE,
 };
 
-enum bpf_iter_link_info {
-       BPF_ITER_LINK_UNSPEC = 0,
-       BPF_ITER_LINK_MAP_FD = 1,
-
-       MAX_BPF_ITER_LINK_INFO,
-};
-
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
                };
                __u32           attach_type;    /* attach type */
                __u32           flags;          /* extra flags */
+               __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
+               __u32           iter_info_len;  /* iter_info length */
        } link_create;
 
        struct { /* struct used by BPF_LINK_UPDATE command */
index 363b9ca..b671596 100644 (file)
@@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link)
        struct bpf_iter_link *iter_link =
                container_of(link, struct bpf_iter_link, link);
 
-       if (iter_link->aux.map)
-               bpf_map_put_with_uref(iter_link->aux.map);
+       if (iter_link->tinfo->reg_info->detach_target)
+               iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
 }
 
 static void bpf_iter_link_dealloc(struct bpf_link *link)
@@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link)
 
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
+       union bpf_iter_link_info __user *ulinfo;
        struct bpf_link_primer link_primer;
        struct bpf_iter_target_info *tinfo;
-       struct bpf_iter_aux_info aux = {};
+       union bpf_iter_link_info linfo;
        struct bpf_iter_link *link;
-       u32 prog_btf_id, target_fd;
+       u32 prog_btf_id, linfo_len;
        bool existed = false;
-       struct bpf_map *map;
        int err;
 
+       if (attr->link_create.target_fd || attr->link_create.flags)
+               return -EINVAL;
+
+       memset(&linfo, 0, sizeof(union bpf_iter_link_info));
+
+       ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
+       linfo_len = attr->link_create.iter_info_len;
+       if (!ulinfo ^ !linfo_len)
+               return -EINVAL;
+
+       if (ulinfo) {
+               err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
+                                              linfo_len);
+               if (err)
+                       return err;
+               linfo_len = min_t(u32, linfo_len, sizeof(linfo));
+               if (copy_from_user(&linfo, ulinfo, linfo_len))
+                       return -EFAULT;
+       }
+
        prog_btf_id = prog->aux->attach_btf_id;
        mutex_lock(&targets_mutex);
        list_for_each_entry(tinfo, &targets, list) {
@@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
        if (!existed)
                return -ENOENT;
 
-       /* Make sure user supplied flags are target expected. */
-       target_fd = attr->link_create.target_fd;
-       if (attr->link_create.flags != tinfo->reg_info->req_linfo)
-               return -EINVAL;
-       if (!attr->link_create.flags && target_fd)
-               return -EINVAL;
-
        link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
        if (!link)
                return -ENOMEM;
@@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
                return err;
        }
 
-       if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
-               map = bpf_map_get_with_uref(target_fd);
-               if (IS_ERR(map)) {
-                       err = PTR_ERR(map);
-                       goto cleanup_link;
-               }
-
-               aux.map = map;
-               err = tinfo->reg_info->check_target(prog, &aux);
+       if (tinfo->reg_info->attach_target) {
+               err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
                if (err) {
-                       bpf_map_put_with_uref(map);
-                       goto cleanup_link;
+                       bpf_link_cleanup(&link_primer);
+                       return err;
                }
-
-               link->aux.map = map;
        }
 
        return bpf_link_settle(&link_primer);
-
-cleanup_link:
-       bpf_link_cleanup(&link_primer);
-       return err;
 }
 
 static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
index fbe1f55..af86048 100644 (file)
@@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = {
        .seq_info               = &bpf_map_seq_info,
 };
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-                             struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+                              union bpf_iter_link_info *linfo,
+                              struct bpf_iter_aux_info *aux)
 {
        u32 key_acc_size, value_acc_size, key_size, value_size;
-       struct bpf_map *map = aux->map;
+       struct bpf_map *map;
        bool is_percpu = false;
+       int err = -EINVAL;
+
+       if (!linfo->map.map_fd)
+               return -EBADF;
+
+       map = bpf_map_get_with_uref(linfo->map.map_fd);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
 
        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
            map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
@@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
        else if (map->map_type != BPF_MAP_TYPE_HASH &&
                 map->map_type != BPF_MAP_TYPE_LRU_HASH &&
                 map->map_type != BPF_MAP_TYPE_ARRAY)
-               return -EINVAL;
+               goto put_map;
 
        key_acc_size = prog->aux->max_rdonly_access;
        value_acc_size = prog->aux->max_rdwr_access;
@@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
        else
                value_size = round_up(map->value_size, 8) * num_possible_cpus();
 
-       if (key_acc_size > key_size || value_acc_size > value_size)
-               return -EACCES;
+       if (key_acc_size > key_size || value_acc_size > value_size) {
+               err = -EACCES;
+               goto put_map;
+       }
 
+       aux->map = map;
        return 0;
+
+put_map:
+       bpf_map_put_with_uref(map);
+       return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+       bpf_map_put_with_uref(aux->map);
 }
 
 DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
@@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
 
 static const struct bpf_iter_reg bpf_map_elem_reg_info = {
        .target                 = "bpf_map_elem",
-       .check_target           = bpf_iter_check_map,
-       .req_linfo              = BPF_ITER_LINK_MAP_FD,
+       .attach_target          = bpf_iter_attach_map,
+       .detach_target          = bpf_iter_detach_map,
        .ctx_arg_info_size      = 2,
        .ctx_arg_info           = {
                { offsetof(struct bpf_iter__bpf_map_elem, key),
index 2f343ce..86299a2 100644 (file)
@@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
        return -EINVAL;
 }
 
-#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
+#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
 static int link_create(union bpf_attr *attr)
 {
        enum bpf_prog_type ptype;
index d3377c9..b988f48 100644 (file)
@@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
        return 0;
 }
 
-static int bpf_iter_check_map(struct bpf_prog *prog,
-                             struct bpf_iter_aux_info *aux)
+static int bpf_iter_attach_map(struct bpf_prog *prog,
+                              union bpf_iter_link_info *linfo,
+                              struct bpf_iter_aux_info *aux)
 {
-       struct bpf_map *map = aux->map;
+       struct bpf_map *map;
+       int err = -EINVAL;
+
+       if (!linfo->map.map_fd)
+               return -EBADF;
+
+       map = bpf_map_get_with_uref(linfo->map.map_fd);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
 
        if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
-               return -EINVAL;
+               goto put_map;
 
-       if (prog->aux->max_rdonly_access > map->value_size)
-               return -EACCES;
+       if (prog->aux->max_rdonly_access > map->value_size) {
+               err = -EACCES;
+               goto put_map;
+       }
 
+       aux->map = map;
        return 0;
+
+put_map:
+       bpf_map_put_with_uref(map);
+       return err;
+}
+
+static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
+{
+       bpf_map_put_with_uref(aux->map);
 }
 
 static const struct seq_operations bpf_sk_storage_map_seq_ops = {
@@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 
 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
        .target                 = "bpf_sk_storage_map",
-       .check_target           = bpf_iter_check_map,
-       .req_linfo              = BPF_ITER_LINK_MAP_FD,
+       .attach_target          = bpf_iter_attach_map,
+       .detach_target          = bpf_iter_detach_map,
        .ctx_arg_info_size      = 2,
        .ctx_arg_info           = {
                { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),