bpf: Introduce opaque bpf_refcount struct and add btf_record plumbing
authorDave Marchevsky <davemarchevsky@fb.com>
Sat, 15 Apr 2023 20:18:04 +0000 (13:18 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Sun, 16 Apr 2023 00:36:49 +0000 (17:36 -0700)
A 'struct bpf_refcount' is added to the set of opaque uapi/bpf.h types
meant for use in BPF programs. Similarly to other opaque types like
bpf_spin_lock and bpf_rbtree_node, the verifier needs to know where in
user-defined struct types a bpf_refcount can be located, so necessary
btf_record plumbing is added to enable this. bpf_refcount is sized to
hold a refcount_t.

Similarly to bpf_spin_lock, the offset of a bpf_refcount is cached in
btf_record as refcount_off in addition to being in the field array.
Caching refcount_off makes sense for this field because further patches
in the series will modify functions that take local kptrs (e.g.
bpf_obj_drop) to change their behavior if the type they're operating on
is refcounted. So enabling fast "is this type refcounted?" checks is
desirable.

No such verifier behavior changes are introduced in this patch, just
logic to recognize 'struct bpf_refcount' in btf_record.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-3-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/bpf.h
include/uapi/linux/bpf.h
kernel/bpf/btf.c
kernel/bpf/syscall.c
tools/include/uapi/linux/bpf.h

index 7888ed497432906f11d3d75c1bf704c2d69109ee..be44d765b7a493231f4f1137c39531f7796f2e9d 100644 (file)
@@ -187,6 +187,7 @@ enum btf_field_type {
        BPF_RB_NODE    = (1 << 7),
        BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
                                 BPF_RB_NODE | BPF_RB_ROOT,
+       BPF_REFCOUNT   = (1 << 8),
 };
 
 typedef void (*btf_dtor_kfunc_t)(void *);
@@ -223,6 +224,7 @@ struct btf_record {
        u32 field_mask;
        int spin_lock_off;
        int timer_off;
+       int refcount_off;
        struct btf_field fields[];
 };
 
@@ -293,6 +295,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
                return "bpf_rb_root";
        case BPF_RB_NODE:
                return "bpf_rb_node";
+       case BPF_REFCOUNT:
+               return "bpf_refcount";
        default:
                WARN_ON_ONCE(1);
                return "unknown";
@@ -317,6 +321,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
                return sizeof(struct bpf_rb_root);
        case BPF_RB_NODE:
                return sizeof(struct bpf_rb_node);
+       case BPF_REFCOUNT:
+               return sizeof(struct bpf_refcount);
        default:
                WARN_ON_ONCE(1);
                return 0;
@@ -341,6 +347,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
                return __alignof__(struct bpf_rb_root);
        case BPF_RB_NODE:
                return __alignof__(struct bpf_rb_node);
+       case BPF_REFCOUNT:
+               return __alignof__(struct bpf_refcount);
        default:
                WARN_ON_ONCE(1);
                return 0;
index 3823100b793407acfc742b3e1a5a61c9aad1c738..4b20a7269beeae45ad6dc679fc6677bb441b6d97 100644 (file)
@@ -6985,6 +6985,10 @@ struct bpf_rb_node {
        __u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_refcount {
+       __u32 :32;
+} __attribute__((aligned(4)));
+
 struct bpf_sysctl {
        __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
                                 * Allows 1,2,4-byte read, but no write.
index f3c998feeccbbd1f8697e7f32100b841b4915b65..14889fd5ba8e38b8f0e763f8524c3ecc1dfc279d 100644 (file)
@@ -3391,6 +3391,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
        field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
        field_mask_test_name(BPF_RB_ROOT,   "bpf_rb_root");
        field_mask_test_name(BPF_RB_NODE,   "bpf_rb_node");
+       field_mask_test_name(BPF_REFCOUNT,  "bpf_refcount");
 
        /* Only return BPF_KPTR when all other types with matchable names fail */
        if (field_mask & BPF_KPTR) {
@@ -3439,6 +3440,7 @@ static int btf_find_struct_field(const struct btf *btf,
                case BPF_TIMER:
                case BPF_LIST_NODE:
                case BPF_RB_NODE:
+               case BPF_REFCOUNT:
                        ret = btf_find_struct(btf, member_type, off, sz, field_type,
                                              idx < info_cnt ? &info[idx] : &tmp);
                        if (ret < 0)
@@ -3504,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
                case BPF_TIMER:
                case BPF_LIST_NODE:
                case BPF_RB_NODE:
+               case BPF_REFCOUNT:
                        ret = btf_find_struct(btf, var_type, off, sz, field_type,
                                              idx < info_cnt ? &info[idx] : &tmp);
                        if (ret < 0)
@@ -3734,6 +3737,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 
        rec->spin_lock_off = -EINVAL;
        rec->timer_off = -EINVAL;
+       rec->refcount_off = -EINVAL;
        for (i = 0; i < cnt; i++) {
                field_type_size = btf_field_type_size(info_arr[i].type);
                if (info_arr[i].off + field_type_size > value_size) {
@@ -3763,6 +3767,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
                        /* Cache offset for faster lookup at runtime */
                        rec->timer_off = rec->fields[i].offset;
                        break;
+               case BPF_REFCOUNT:
+                       WARN_ON_ONCE(rec->refcount_off >= 0);
+                       /* Cache offset for faster lookup at runtime */
+                       rec->refcount_off = rec->fields[i].offset;
+                       break;
                case BPF_KPTR_UNREF:
                case BPF_KPTR_REF:
                        ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
@@ -5308,6 +5317,7 @@ static const char *alloc_obj_fields[] = {
        "bpf_list_node",
        "bpf_rb_root",
        "bpf_rb_node",
+       "bpf_refcount",
 };
 
 static struct btf_struct_metas *
@@ -5381,7 +5391,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
                type = &tab->types[tab->cnt];
                type->btf_id = i;
                record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
-                                                 BPF_RB_ROOT | BPF_RB_NODE, t->size);
+                                                 BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
                /* The record cannot be unset, treat it as an error if so */
                if (IS_ERR_OR_NULL(record)) {
                        ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
index c08b7933bf8fe51e1fd9982ad52c71c5efa8b17d..28eac7434d32b1fa32194ac6852877a0a499894b 100644 (file)
@@ -552,6 +552,7 @@ void btf_record_free(struct btf_record *rec)
                case BPF_RB_NODE:
                case BPF_SPIN_LOCK:
                case BPF_TIMER:
+               case BPF_REFCOUNT:
                        /* Nothing to release */
                        break;
                default:
@@ -599,6 +600,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
                case BPF_RB_NODE:
                case BPF_SPIN_LOCK:
                case BPF_TIMER:
+               case BPF_REFCOUNT:
                        /* Nothing to acquire */
                        break;
                default:
@@ -705,6 +707,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
                        break;
                case BPF_LIST_NODE:
                case BPF_RB_NODE:
+               case BPF_REFCOUNT:
                        break;
                default:
                        WARN_ON_ONCE(1);
@@ -1032,7 +1035,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 
        map->record = btf_parse_fields(btf, value_type,
                                       BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
-                                      BPF_RB_ROOT,
+                                      BPF_RB_ROOT | BPF_REFCOUNT,
                                       map->value_size);
        if (!IS_ERR_OR_NULL(map->record)) {
                int i;
@@ -1071,6 +1074,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
                                break;
                        case BPF_KPTR_UNREF:
                        case BPF_KPTR_REF:
+                       case BPF_REFCOUNT:
                                if (map->map_type != BPF_MAP_TYPE_HASH &&
                                    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
                                    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
index 3823100b793407acfc742b3e1a5a61c9aad1c738..4b20a7269beeae45ad6dc679fc6677bb441b6d97 100644 (file)
@@ -6985,6 +6985,10 @@ struct bpf_rb_node {
        __u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_refcount {
+       __u32 :32;
+} __attribute__((aligned(4)));
+
 struct bpf_sysctl {
        __u32   write;          /* Sysctl is being read (= 0) or written (= 1).
                                 * Allows 1,2,4-byte read, but no write.