1 /* SPDX-License-Identifier: GPL-2.0 */
6 #include <linux/errno.h>
7 #include <linux/jump_label.h>
8 #include <linux/percpu.h>
9 #include <linux/percpu-refcount.h>
10 #include <linux/rbtree.h>
11 #include <uapi/linux/bpf.h>
19 struct bpf_sock_ops_kern;
20 struct bpf_cgroup_storage;
22 struct ctl_table_header;
25 #ifdef CONFIG_CGROUP_BPF
26 enum cgroup_bpf_attach_type {
27 CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
28 CGROUP_INET_INGRESS = 0,
30 CGROUP_INET_SOCK_CREATE,
37 CGROUP_INET4_POST_BIND,
38 CGROUP_INET6_POST_BIND,
46 CGROUP_INET4_GETPEERNAME,
47 CGROUP_INET6_GETPEERNAME,
48 CGROUP_INET4_GETSOCKNAME,
49 CGROUP_INET6_GETSOCKNAME,
50 CGROUP_INET_SOCK_RELEASE,
51 MAX_CGROUP_BPF_ATTACH_TYPE
54 #define CGROUP_ATYPE(type) \
55 case BPF_##type: return type
57 static inline enum cgroup_bpf_attach_type
58 to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
60 switch (attach_type) {
61 CGROUP_ATYPE(CGROUP_INET_INGRESS);
62 CGROUP_ATYPE(CGROUP_INET_EGRESS);
63 CGROUP_ATYPE(CGROUP_INET_SOCK_CREATE);
64 CGROUP_ATYPE(CGROUP_SOCK_OPS);
65 CGROUP_ATYPE(CGROUP_DEVICE);
66 CGROUP_ATYPE(CGROUP_INET4_BIND);
67 CGROUP_ATYPE(CGROUP_INET6_BIND);
68 CGROUP_ATYPE(CGROUP_INET4_CONNECT);
69 CGROUP_ATYPE(CGROUP_INET6_CONNECT);
70 CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
71 CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
72 CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
73 CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
74 CGROUP_ATYPE(CGROUP_SYSCTL);
75 CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
76 CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
77 CGROUP_ATYPE(CGROUP_GETSOCKOPT);
78 CGROUP_ATYPE(CGROUP_SETSOCKOPT);
79 CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
80 CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
81 CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
82 CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
83 CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
85 return CGROUP_BPF_ATTACH_TYPE_INVALID;
91 extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE];
92 #define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype])
94 #define for_each_cgroup_storage_type(stype) \
95 for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
97 struct bpf_cgroup_storage_map;
99 struct bpf_storage_buffer {
104 struct bpf_cgroup_storage {
106 struct bpf_storage_buffer *buf;
107 void __percpu *percpu_buf;
109 struct bpf_cgroup_storage_map *map;
110 struct bpf_cgroup_storage_key key;
111 struct list_head list_map;
112 struct list_head list_cg;
117 struct bpf_cgroup_link {
118 struct bpf_link link;
119 struct cgroup *cgroup;
120 enum bpf_attach_type type;
123 struct bpf_prog_list {
124 struct list_head node;
125 struct bpf_prog *prog;
126 struct bpf_cgroup_link *link;
127 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
130 struct bpf_prog_array;
133 /* array of effective progs in this cgroup */
134 struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
136 /* attached progs to this cgroup and attach flags
137 * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
138 * have either zero or one element
139 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
141 struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
142 u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
144 /* list of cgroup shared storages */
145 struct list_head storages;
147 /* temp storage for effective prog array used by prog_attach/detach */
148 struct bpf_prog_array *inactive;
150 /* reference counter used to detach bpf programs after cgroup removal */
151 struct percpu_ref refcnt;
153 /* cgroup_bpf is released using a work queue */
154 struct work_struct release_work;
157 int cgroup_bpf_inherit(struct cgroup *cgrp);
158 void cgroup_bpf_offline(struct cgroup *cgrp);
160 int __cgroup_bpf_attach(struct cgroup *cgrp,
161 struct bpf_prog *prog, struct bpf_prog *replace_prog,
162 struct bpf_cgroup_link *link,
163 enum bpf_attach_type type, u32 flags);
164 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
165 struct bpf_cgroup_link *link,
166 enum bpf_attach_type type);
167 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
168 union bpf_attr __user *uattr);
170 /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
171 int cgroup_bpf_attach(struct cgroup *cgrp,
172 struct bpf_prog *prog, struct bpf_prog *replace_prog,
173 struct bpf_cgroup_link *link, enum bpf_attach_type type,
175 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
176 enum bpf_attach_type type);
177 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
178 union bpf_attr __user *uattr);
180 int __cgroup_bpf_run_filter_skb(struct sock *sk,
182 enum cgroup_bpf_attach_type atype);
184 int __cgroup_bpf_run_filter_sk(struct sock *sk,
185 enum cgroup_bpf_attach_type atype);
187 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
188 struct sockaddr *uaddr,
189 enum cgroup_bpf_attach_type atype,
193 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
194 struct bpf_sock_ops_kern *sock_ops,
195 enum cgroup_bpf_attach_type atype);
197 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
198 short access, enum cgroup_bpf_attach_type atype);
200 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
201 struct ctl_table *table, int write,
202 char **buf, size_t *pcount, loff_t *ppos,
203 enum cgroup_bpf_attach_type atype);
205 int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
206 int *optname, char __user *optval,
207 int *optlen, char **kernel_optval);
208 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
209 int optname, char __user *optval,
210 int __user *optlen, int max_optlen,
213 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
214 int optname, void *optval,
215 int *optlen, int retval);
217 static inline enum bpf_cgroup_storage_type cgroup_storage_type(
220 if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
221 return BPF_CGROUP_STORAGE_PERCPU;
223 return BPF_CGROUP_STORAGE_SHARED;
226 struct bpf_cgroup_storage *
227 cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
228 void *key, bool locked);
229 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
230 enum bpf_cgroup_storage_type stype);
231 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
232 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
233 struct cgroup *cgroup,
234 enum bpf_attach_type type);
235 void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
236 int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
238 int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
239 int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
240 void *value, u64 flags);
242 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
243 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
246 if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \
247 __ret = __cgroup_bpf_run_filter_skb(sk, skb, \
248 CGROUP_INET_INGRESS); \
253 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
256 if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
257 typeof(sk) __sk = sk_to_full_sk(sk); \
258 if (sk_fullsock(__sk)) \
259 __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
260 CGROUP_INET_EGRESS); \
265 #define BPF_CGROUP_RUN_SK_PROG(sk, atype) \
268 if (cgroup_bpf_enabled(atype)) { \
269 __ret = __cgroup_bpf_run_filter_sk(sk, atype); \
274 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
275 BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_CREATE)
277 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \
278 BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_RELEASE)
280 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
281 BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET4_POST_BIND)
283 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
284 BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
286 #define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \
288 u32 __unused_flags; \
290 if (cgroup_bpf_enabled(atype)) \
291 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
297 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \
299 u32 __unused_flags; \
301 if (cgroup_bpf_enabled(atype)) { \
303 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
311 /* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
312 * via upper bits of return code. The only flag that is supported
313 * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
314 * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
316 #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \
320 if (cgroup_bpf_enabled(atype)) { \
322 __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
325 if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
326 *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
331 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
332 ((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) || \
333 cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \
334 (sk)->sk_prot->pre_connect)
336 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
337 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
339 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
340 BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
342 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
343 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
345 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
346 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
348 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
349 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
351 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
352 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
354 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
355 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
357 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
358 BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
360 /* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
361 * fullsock and its parent fullsock cannot be traced by
364 * e.g. sock_ops->sk is a request_sock and it is under syncookie mode.
365 * Its listener-sk is not attached to the rsk_listener.
366 * In this case, the caller holds the listener-sk (unlocked),
367 * set its sock_ops->sk to req_sk, and call this SOCK_OPS"_SK" with
368 * the listener-sk such that the cgroup-bpf-progs of the
369 * listener-sk will be run.
371 * Regardless of syncookie mode or not,
372 * calling bpf_setsockopt on listener-sk will not make sense anyway,
373 * so passing 'sock_ops->sk == req_sk' to the bpf prog is appropriate here.
375 #define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
378 if (cgroup_bpf_enabled(CGROUP_SOCK_OPS)) \
379 __ret = __cgroup_bpf_run_filter_sock_ops(sk, \
385 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
388 if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
389 typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
390 if (__sk && sk_fullsock(__sk)) \
391 __ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
398 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) \
401 if (cgroup_bpf_enabled(CGROUP_DEVICE)) \
402 __ret = __cgroup_bpf_check_dev_permission(atype, major, minor, \
410 #define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
413 if (cgroup_bpf_enabled(CGROUP_SYSCTL)) \
414 __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
420 #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
424 if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT)) \
425 __ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
432 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
435 if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
436 get_user(__ret, optlen); \
440 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
441 max_optlen, retval) \
443 int __ret = retval; \
444 if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
445 if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
446 !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
447 tcp_bpf_bypass_getsockopt, \
449 __ret = __cgroup_bpf_run_filter_getsockopt( \
450 sock, level, optname, optval, optlen, \
451 max_optlen, retval); \
455 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
458 int __ret = retval; \
459 if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
460 __ret = __cgroup_bpf_run_filter_getsockopt_kern( \
461 sock, level, optname, optval, optlen, retval); \
465 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
466 enum bpf_prog_type ptype, struct bpf_prog *prog);
467 int cgroup_bpf_prog_detach(const union bpf_attr *attr,
468 enum bpf_prog_type ptype);
469 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
470 int cgroup_bpf_prog_query(const union bpf_attr *attr,
471 union bpf_attr __user *uattr);
474 struct cgroup_bpf {};
475 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
476 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
478 static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
479 enum bpf_prog_type ptype,
480 struct bpf_prog *prog)
485 static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
486 enum bpf_prog_type ptype)
491 static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
492 struct bpf_prog *prog)
497 static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
498 union bpf_attr __user *uattr)
503 static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
504 struct bpf_map *map) { return 0; }
505 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
506 struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
507 static inline void bpf_cgroup_storage_free(
508 struct bpf_cgroup_storage *storage) {}
509 static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
513 static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
514 void *key, void *value, u64 flags) {
518 #define cgroup_bpf_enabled(atype) (0)
519 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
520 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
521 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
522 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
523 #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
524 #define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
525 #define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
526 #define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
527 #define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
528 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
529 #define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) ({ 0; })
530 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) ({ 0; })
531 #define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) ({ 0; })
532 #define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
533 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
534 #define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
535 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
536 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
537 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
538 #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
539 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
540 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
541 optlen, max_optlen, retval) ({ retval; })
542 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
543 optlen, retval) ({ retval; })
544 #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
545 kernel_optval) ({ 0; })
547 #define for_each_cgroup_storage_type(stype) for (; false; )
549 #endif /* CONFIG_CGROUP_BPF */
551 #endif /* _BPF_CGROUP_H */