/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
+ * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run);
*
* the structure returned by bpf_prog_array_alloc() should be populated
* with program pointers and the last pointer must be NULL.
typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
-static __always_inline int
-BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog,
- int retval, u32 *ret_flags)
-{
- const struct bpf_prog_array_item *item;
- const struct bpf_prog *prog;
- const struct bpf_prog_array *array;
- struct bpf_run_ctx *old_run_ctx;
- struct bpf_cg_run_ctx run_ctx;
- u32 func_ret;
-
- run_ctx.retval = retval;
- migrate_disable();
- rcu_read_lock();
- array = rcu_dereference(array_rcu);
- item = &array->items[0];
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
- while ((prog = READ_ONCE(item->prog))) {
- run_ctx.prog_item = item;
- func_ret = run_prog(prog, ctx);
- if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
- run_ctx.retval = -EPERM;
- *(ret_flags) |= (func_ret >> 1);
- item++;
- }
- bpf_reset_run_ctx(old_run_ctx);
- rcu_read_unlock();
- migrate_enable();
- return run_ctx.retval;
-}
-
-static __always_inline int
-BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
- const void *ctx, bpf_prog_run_fn run_prog,
- int retval)
-{
- const struct bpf_prog_array_item *item;
- const struct bpf_prog *prog;
- const struct bpf_prog_array *array;
- struct bpf_run_ctx *old_run_ctx;
- struct bpf_cg_run_ctx run_ctx;
-
- run_ctx.retval = retval;
- migrate_disable();
- rcu_read_lock();
- array = rcu_dereference(array_rcu);
- item = &array->items[0];
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
- while ((prog = READ_ONCE(item->prog))) {
- run_ctx.prog_item = item;
- if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
- run_ctx.retval = -EPERM;
- item++;
- }
- bpf_reset_run_ctx(old_run_ctx);
- rcu_read_unlock();
- migrate_enable();
- return run_ctx.retval;
-}
-
static __always_inline u32
-BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+bpf_prog_run_array(const struct bpf_prog_array *array,
const void *ctx, bpf_prog_run_fn run_prog)
{
const struct bpf_prog_array_item *item;
const struct bpf_prog *prog;
- const struct bpf_prog_array *array;
struct bpf_run_ctx *old_run_ctx;
struct bpf_trace_run_ctx run_ctx;
u32 ret = 1;
- migrate_disable();
- rcu_read_lock();
- array = rcu_dereference(array_rcu);
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
+
if (unlikely(!array))
- goto out;
+ return ret;
+
+ migrate_disable();
old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
item = &array->items[0];
while ((prog = READ_ONCE(item->prog))) {
item++;
}
bpf_reset_run_ctx(old_run_ctx);
-out:
- rcu_read_unlock();
migrate_enable();
return ret;
}
-/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
- * so BPF programs can request cwr for TCP packets.
- *
- * Current cgroup skb programs can only return 0 or 1 (0 to drop the
- * packet. This macro changes the behavior so the low order bit
- * indicates whether the packet should be dropped (0) or not (1)
- * and the next bit is a congestion notification bit. This could be
- * used by TCP to call tcp_enter_cwr()
- *
- * Hence, new allowed return values of CGROUP EGRESS BPF programs are:
- * 0: drop packet
- * 1: keep packet
- * 2: drop packet and cn
- * 3: keep packet and cn
- *
- * This macro then converts it to one of the NET_XMIT or an error
- * code that is then interpreted as drop packet (and no cn):
- * 0: NET_XMIT_SUCCESS skb should be transmitted
- * 1: NET_XMIT_DROP skb should be dropped and cn
- * 2: NET_XMIT_CN skb should be transmitted and cn
- * 3: -err skb should be dropped
- */
-#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
- ({ \
- u32 _flags = 0; \
- bool _cn; \
- u32 _ret; \
- _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
- _cn = _flags & BPF_RET_SET_CN; \
- if (_ret && !IS_ERR_VALUE((long)_ret)) \
- _ret = -EFAULT; \
- if (!_ret) \
- _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
- else \
- _ret = (_cn ? NET_XMIT_DROP : _ret); \
- _ret; \
- })
-
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+/* __always_inline is necessary to prevent indirect call through run_prog
+ * function pointer.
+ */
+static __always_inline int
+bpf_prog_run_array_cg_flags(const struct cgroup_bpf *cgrp,
+ enum cgroup_bpf_attach_type atype,
+ const void *ctx, bpf_prog_run_fn run_prog,
+ int retval, u32 *ret_flags)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+ u32 func_ret;
+
+ run_ctx.retval = retval;
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(cgrp->effective[atype]);
+ item = &array->items[0];
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.prog_item = item;
+ func_ret = run_prog(prog, ctx);
+ if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+ run_ctx.retval = -EPERM;
+ *(ret_flags) |= (func_ret >> 1);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+ migrate_enable();
+ return run_ctx.retval;
+}
+
+static __always_inline int
+bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
+ enum cgroup_bpf_attach_type atype,
+ const void *ctx, bpf_prog_run_fn run_prog,
+ int retval)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+
+ run_ctx.retval = retval;
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(cgrp->effective[atype]);
+ item = &array->items[0];
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.prog_item = item;
+ if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+ run_ctx.retval = -EPERM;
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+ migrate_enable();
+ return run_ctx.retval;
+}
+
void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
bpf_compute_and_save_data_end(skb, &saved_data_end);
if (atype == CGROUP_INET_EGRESS) {
- ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
- cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
+ u32 flags = 0;
+ bool cn;
+
+ ret = bpf_prog_run_array_cg_flags(
+ &cgrp->bpf, atype,
+ skb, __bpf_prog_run_save_cb, 0, &flags);
+
+ /* Return values of CGROUP EGRESS BPF programs are:
+ * 0: drop packet
+ * 1: keep packet
+ * 2: drop packet and cn
+ * 3: keep packet and cn
+ *
+ * The returned value is then converted to one of the NET_XMIT
+ * or an error code that is then interpreted as drop packet
+ * (and no cn):
+ * 0: NET_XMIT_SUCCESS skb should be transmitted
+ * 1: NET_XMIT_DROP skb should be dropped and cn
+ * 2: NET_XMIT_CN skb should be transmitted and cn
+ * 3: -err skb should be dropped
+ */
+
+ cn = flags & BPF_RET_SET_CN;
+ if (ret && !IS_ERR_VALUE((long)ret))
+ ret = -EFAULT;
+ if (!ret)
+ ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
+ else
+ ret = (cn ? NET_XMIT_DROP : ret);
} else {
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
- __bpf_prog_run_save_cb, 0);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
+ skb, __bpf_prog_run_save_cb, 0);
if (ret && !IS_ERR_VALUE((long)ret))
ret = -EFAULT;
}
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
- bpf_prog_run, 0);
+ return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run, 0, flags);
+ return bpf_prog_run_array_cg_flags(&cgrp->bpf, atype,
+ &ctx, bpf_prog_run, 0, flags);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
- bpf_prog_run, 0);
+ return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
+ 0);
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run, 0);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
rcu_read_unlock();
return ret;
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
- bpf_prog_run, 0);
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
rcu_read_unlock();
kfree(ctx.cur_val);
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
&ctx, bpf_prog_run, 0);
release_sock(sk);
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
&ctx, bpf_prog_run, retval);
release_sock(sk);
* be called if that data shouldn't be "exported".
*/
- ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+ ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
&ctx, bpf_prog_run, retval);
if (ret < 0)
return ret;