bpf: implement sleepable uprobes by chaining gps

author Delyan Kratunov <delyank@fb.com>

Tue, 14 Jun 2022 23:10:46 +0000 (23:10 +0000)

committer Alexei Starovoitov <ast@kernel.org>

Fri, 17 Jun 2022 02:27:29 +0000 (19:27 -0700)
author Delyan Kratunov <delyank@fb.com>
Tue, 14 Jun 2022 23:10:46 +0000 (23:10 +0000)
committer Alexei Starovoitov <ast@kernel.org>
Fri, 17 Jun 2022 02:27:29 +0000 (19:27 -0700)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index 69106ae4646461e9a038d0082a26dd8268eac72e..f3e88afdaffeb697cc184517733e366f6ac16d18 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -26,6 +26,7 @@
  #include <linux/stddef.h>
  #include <linux/bpfptr.h>
  #include <linux/btf.h>
+#include <linux/rcupdate_trace.h>
  
  struct bpf_verifier_env;
  struct bpf_verifier_log;
@@ -1372,6 +1373,8 @@ extern struct bpf_empty_prog_array bpf_empty_prog_array;
  
  struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
  void bpf_prog_array_free(struct bpf_prog_array *progs);
+/* Use when traversal over the bpf_prog_array uses tasks_trace rcu */
+void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs);
  int bpf_prog_array_length(struct bpf_prog_array *progs);
  bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
  int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
@@ -1463,6 +1466,55 @@ bpf_prog_run_array(const struct bpf_prog_array *array,
         return ret;
  }
  
+/* Notes on RCU design for bpf_prog_arrays containing sleepable programs:
+ *
+ * We use the tasks_trace rcu flavor read section to protect the bpf_prog_array
+ * overall. As a result, we must use the bpf_prog_array_free_sleepable
+ * in order to use the tasks_trace rcu grace period.
+ *
+ * When a non-sleepable program is inside the array, we take the rcu read
+ * section and disable preemption for that program alone, so it can access
+ * rcu-protected dynamically sized maps.
+ */
+static __always_inline u32
+bpf_prog_run_array_sleepable(const struct bpf_prog_array __rcu *array_rcu,
+                            const void *ctx, bpf_prog_run_fn run_prog)
+{
+       const struct bpf_prog_array_item *item;
+       const struct bpf_prog *prog;
+       const struct bpf_prog_array *array;
+       struct bpf_run_ctx *old_run_ctx;
+       struct bpf_trace_run_ctx run_ctx;
+       u32 ret = 1;
+
+       might_fault();
+
+       rcu_read_lock_trace();
+       migrate_disable();
+
+       array = rcu_dereference_check(array_rcu, rcu_read_lock_trace_held());
+       if (unlikely(!array))
+               goto out;
+       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+       item = &array->items[0];
+       while ((prog = READ_ONCE(item->prog))) {
+               if (!prog->aux->sleepable)
+                       rcu_read_lock();
+
+               run_ctx.bpf_cookie = item->bpf_cookie;
+               ret &= run_prog(prog, ctx);
+               item++;
+
+               if (!prog->aux->sleepable)
+                       rcu_read_unlock();
+       }
+       bpf_reset_run_ctx(old_run_ctx);
+out:
+       migrate_enable();
+       rcu_read_unlock_trace();
+       return ret;
+}
+
  #ifdef CONFIG_BPF_SYSCALL
  DECLARE_PER_CPU(int, bpf_prog_active);
  extern struct mutex bpf_stats_enabled_mutex;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index e78cc5eea4a58012283656ccb38b61203e15d2fc..b5ffebcce6ccae22ad3d252c823b7a61d7bfd206 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2279,6 +2279,21 @@ void bpf_prog_array_free(struct bpf_prog_array *progs)
         kfree_rcu(progs, rcu);
  }
  
+static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu)
+{
+       struct bpf_prog_array *progs;
+
+       progs = container_of(rcu, struct bpf_prog_array, rcu);
+       kfree_rcu(progs, rcu);
+}
+
+void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs)
+{
+       if (!progs || progs == &bpf_empty_prog_array.hdr)
+               return;
+       call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb);
+}
+
  int bpf_prog_array_length(struct bpf_prog_array *array)
  {
         struct bpf_prog_array_item *item;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index 10b157a6d73e0746d3ae2bf5ccfcd3423384085f..d1c22594dbf9c046ca11520e8a6b6046a1b33356 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1936,7 +1936,7 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
         event->prog = prog;
         event->bpf_cookie = bpf_cookie;
         rcu_assign_pointer(event->tp_event->prog_array, new_array);
-       bpf_prog_array_free(old_array);
+       bpf_prog_array_free_sleepable(old_array);
  
  unlock:
         mutex_unlock(&bpf_event_mutex);
@@ -1962,7 +1962,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
                 bpf_prog_array_delete_safe(old_array, event->prog);
         } else {
                 rcu_assign_pointer(event->tp_event->prog_array, new_array);
-               bpf_prog_array_free(old_array);
+               bpf_prog_array_free_sleepable(old_array);
         }
  
         bpf_prog_put(event->prog);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c

index 9711589273cd58af223a154097687942a6d5e927..0282c119b1b2389918c27ce0a03c7243e5e6983e 100644 (file)
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -16,6 +16,7 @@
  #include <linux/namei.h>
  #include <linux/string.h>
  #include <linux/rculist.h>
+#include <linux/filter.h>
  
  #include "trace_dynevent.h"
  #include "trace_probe.h"
@@ -1346,9 +1347,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
         if (bpf_prog_array_valid(call)) {
                 u32 ret;
  
-               preempt_disable();
-               ret = trace_call_bpf(call, regs);
-               preempt_enable();
+               ret = bpf_prog_run_array_sleepable(call->prog_array, regs, bpf_prog_run);
                 if (!ret)
                         return;
         }
author	Delyan Kratunov <delyank@fb.com>
	Tue, 14 Jun 2022 23:10:46 +0000 (23:10 +0000)
committer	Alexei Starovoitov <ast@kernel.org>
	Fri, 17 Jun 2022 02:27:29 +0000 (19:27 -0700)
include/linux/bpf.h		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/trace/bpf_trace.c		patch \| blob \| history
kernel/trace/trace_uprobe.c		patch \| blob \| history