bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline
authorMartin KaFai Lau <martin.lau@kernel.org>
Thu, 29 Sep 2022 07:04:03 +0000 (00:04 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Thu, 29 Sep 2022 16:25:47 +0000 (09:25 -0700)
The struct_ops prog is to allow using bpf to implement the functions in
a struct (eg. kernel module).  The current usage is to implement the
tcp_congestion.  The kernel does not call the tcp-cc's ops (ie.
the bpf prog) in a recursive way.

The struct_ops is sharing the tracing-trampoline's enter/exit
function which tracks prog->active to avoid recursion.  It is
needed for tracing prog.  However, it turns out the struct_ops
bpf prog will hit this prog->active and unnecessarily skipped
running the struct_ops prog.  eg.  The '.ssthresh' may run in_task()
and then interrupted by softirq that runs the same '.ssthresh'.
Skip running the '.ssthresh' will end up returning random value
to the caller.

The patch adds __bpf_prog_{enter,exit}_struct_ops for the
struct_ops trampoline.  They do not track the prog->active
to detect recursion.

One exception is when the tcp_congestion's '.init' ops is doing
bpf_setsockopt(TCP_CONGESTION) and then recurs to the same
'.init' ops.  This will be addressed in the following patches.

Fixes: ca06f55b9002 ("bpf: Add per-program recursion prevention mechanism")
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20220929070407.965581-2-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
arch/x86/net/bpf_jit_comp.c
include/linux/bpf.h
kernel/bpf/trampoline.c

index 35796db..5b62307 100644 (file)
@@ -1836,6 +1836,9 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
        if (p->aux->sleepable) {
                enter = __bpf_prog_enter_sleepable;
                exit = __bpf_prog_exit_sleepable;
+       } else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) {
+               enter = __bpf_prog_enter_struct_ops;
+               exit = __bpf_prog_exit_struct_ops;
        } else if (p->expected_attach_type == BPF_LSM_CGROUP) {
                enter = __bpf_prog_enter_lsm_cgroup;
                exit = __bpf_prog_exit_lsm_cgroup;
index 0f3eaf3..9e7d46d 100644 (file)
@@ -864,6 +864,10 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
                                        struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
                                        struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
+                                       struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
+                                       struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
 
index 6f7b939..bf0906e 100644 (file)
@@ -964,6 +964,29 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
        rcu_read_unlock_trace();
 }
 
+u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
+                                       struct bpf_tramp_run_ctx *run_ctx)
+       __acquires(RCU)
+{
+       rcu_read_lock();
+       migrate_disable();
+
+       run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
+       return bpf_prog_start_time();
+}
+
+void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
+                                       struct bpf_tramp_run_ctx *run_ctx)
+       __releases(RCU)
+{
+       bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
+       update_prog_stats(prog, start);
+       migrate_enable();
+       rcu_read_unlock();
+}
+
 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
 {
        percpu_ref_get(&tr->pcref);