bpf: add a bpf_override_function helper

author Josef Bacik <jbacik@fb.com>

Mon, 11 Dec 2017 16:36:48 +0000 (11:36 -0500)

committer Alexei Starovoitov <ast@kernel.org>

Tue, 12 Dec 2017 17:02:34 +0000 (09:02 -0800)
author Josef Bacik <jbacik@fb.com>
Mon, 11 Dec 2017 16:36:48 +0000 (11:36 -0500)
committer Alexei Starovoitov <ast@kernel.org>
Tue, 12 Dec 2017 17:02:34 +0000 (09:02 -0800)
diff --git a/arch/Kconfig b/arch/Kconfig

index 400b9e1..d3f4aaf 100644 (file)
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -196,6 +196,9 @@ config HAVE_OPTPROBES
  config HAVE_KPROBES_ON_FTRACE
         bool
  
+config HAVE_KPROBE_OVERRIDE
+       bool
+
  config HAVE_NMI
         bool
  
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 8eed3f9..04d66e6 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -154,6 +154,7 @@ config X86
         select HAVE_KERNEL_XZ
         select HAVE_KPROBES
         select HAVE_KPROBES_ON_FTRACE
+       select HAVE_KPROBE_OVERRIDE
         select HAVE_KRETPROBES
         select HAVE_KVM
         select HAVE_LIVEPATCH                   if X86_64
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h

index 9f2e310..36abb23 100644 (file)
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size;
  void arch_remove_kprobe(struct kprobe *p);
  asmlinkage void kretprobe_trampoline(void);
  
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
+#endif
+
  /* Architecture specific copy of original instruction*/
  struct arch_specific_insn {
         /* copy of the original instruction */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h

index 14131dd..6de1fd3 100644 (file)
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
         return regs->ax;
  }
  
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+       regs->ax = rc;
+}
+
  /*
   * user_mode(regs) determines whether a register set came from user
   * mode.  On x86_32, this is true if V8086 mode was enabled OR if the
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c

index 8dc0161..1ea748d 100644 (file)
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
         p->ainsn.boostable = false;
         return 0;
  }
+
+asmlinkage void override_func(void);
+asm(
+       ".type override_func, @function\n"
+       "override_func:\n"
+       "       ret\n"
+       ".size override_func, .-override_func\n"
+);
+
+void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
+{
+       regs->ip = (unsigned long)&override_func;
+}
+NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 0062302..5feb441 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -458,7 +458,8 @@ struct bpf_prog {
                                 locked:1,       /* Program image locked? */
                                 gpl_compatible:1, /* Is filter GPL compatible? */
                                 cb_access:1,    /* Is control block accessed? */
-                               dst_needed:1;   /* Do we need dst entry? */
+                               dst_needed:1,   /* Do we need dst entry? */
+                               kprobe_override:1; /* Do we override a kprobe? */
         enum bpf_prog_type      type;           /* Type of BPF program */
         u32                     len;            /* Number of filter blocks */
         u32                     jited_len;      /* Size of jited insns in bytes */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h

index af44e7c..5fea451 100644 (file)
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -528,6 +528,7 @@ do {                                                                        \
  struct perf_event;
  
  DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
+DECLARE_PER_CPU(int, bpf_kprobe_override);
  
  extern int  perf_trace_init(struct perf_event *event);
  extern void perf_trace_destroy(struct perf_event *event);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 80d62e8..595bda1 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -677,6 +677,10 @@ union bpf_attr {
   *     @buf: buf to fill
   *     @buf_size: size of the buf
   *     Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ *     @pt_regs: pointer to struct pt_regs
+ *     @rc: the return value to set
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -736,7 +740,8 @@ union bpf_attr {
         FN(xdp_adjust_meta),            \
         FN(perf_event_read_value),      \
         FN(perf_prog_read_value),       \
-       FN(getsockopt),
+       FN(getsockopt),                 \
+       FN(override_return),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index b16c6f8..d32bebf 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
  bool bpf_prog_array_compatible(struct bpf_array *array,
                                const struct bpf_prog *fp)
  {
+       if (fp->kprobe_override)
+               return false;
+
         if (!array->owner_prog_type) {
                 /* There's no owner yet where we could check for
                  * compatibility.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 7afa92e..e807bda 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                         prog->dst_needed = 1;
                 if (insn->imm == BPF_FUNC_get_prandom_u32)
                         bpf_user_rnd_init_once();
+               if (insn->imm == BPF_FUNC_override_return)
+                       prog->kprobe_override = 1;
                 if (insn->imm == BPF_FUNC_tail_call) {
                         /* If we tail call into other programs, we
                          * cannot make any assumptions since they can
diff --git a/kernel/events/core.c b/kernel/events/core.c

index f10609e..5857c50 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8080,6 +8080,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
                 return -EINVAL;
         }
  
+       /* Kprobe override only works for kprobes, not uprobes. */
+       if (prog->kprobe_override &&
+           !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
+               bpf_prog_put(prog);
+               return -EINVAL;
+       }
+
         if (is_tracepoint || is_syscall_tp) {
                 int off = trace_event_get_offsets(event->tp_event);
  
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig

index af7dad1..3e6fd58 100644 (file)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -529,6 +529,17 @@ config FUNCTION_PROFILER
  
           If in doubt, say N.
  
+config BPF_KPROBE_OVERRIDE
+       bool "Enable BPF programs to override a kprobed function"
+       depends on BPF_EVENTS
+       depends on KPROBES_ON_FTRACE
+       depends on HAVE_KPROBE_OVERRIDE
+       depends on DYNAMIC_FTRACE_WITH_REGS
+       default n
+       help
+        Allows BPF to override the execution of a probed function and
+        set a different return value.  This is used for error injection.
+
  config FTRACE_MCOUNT_RECORD
         def_bool y
         depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index b143f2a..e009b7e 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,6 +13,10 @@
  #include <linux/filter.h>
  #include <linux/uaccess.h>
  #include <linux/ctype.h>
+#include <linux/kprobes.h>
+#include <asm/kprobes.h>
+
+#include "trace_probe.h"
  #include "trace.h"
  
  u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
  }
  EXPORT_SYMBOL_GPL(trace_call_bpf);
  
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+       __this_cpu_write(bpf_kprobe_override, 1);
+       regs_set_return_value(regs, rc);
+       arch_ftrace_kprobe_override_function(regs);
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_override_return_proto = {
+       .func           = bpf_override_return,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+};
+#endif
+
  BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
  {
         int ret;
@@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
                 return &bpf_get_stackid_proto;
         case BPF_FUNC_perf_event_read_value:
                 return &bpf_perf_event_read_value_proto;
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+       case BPF_FUNC_override_return:
+               return &bpf_override_return_proto;
+#endif
         default:
                 return tracing_func_proto(func_id);
         }
@@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
         struct bpf_prog_array *new_array;
         int ret = -EEXIST;
  
+       /*
+        * Kprobe override only works for ftrace based kprobes, and only if they
+        * are on the opt-in list.
+        */
+       if (prog->kprobe_override &&
+           (!trace_kprobe_ftrace(event->tp_event) ||
+            !trace_kprobe_error_injectable(event->tp_event)))
+               return -EINVAL;
+
         mutex_lock(&bpf_event_mutex);
  
         if (event->prog)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c

index 492700c..5db8498 100644 (file)
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -42,6 +42,7 @@ struct trace_kprobe {
         (offsetof(struct trace_kprobe, tp.args) +       \
         (sizeof(struct probe_arg) * (n)))
  
+DEFINE_PER_CPU(int, bpf_kprobe_override);
  
  static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
  {
@@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
         return nhit;
  }
  
+int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+       struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+       return kprobe_ftrace(&tk->rp.kp);
+}
+
+int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+       struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+       unsigned long addr;
+
+       if (tk->symbol) {
+               addr = (unsigned long)
+                       kallsyms_lookup_name(trace_kprobe_symbol(tk));
+               addr += tk->rp.kp.offset;
+       } else {
+               addr = (unsigned long)tk->rp.kp.addr;
+       }
+       return within_kprobe_error_injection_list(addr);
+}
+
  static int register_kprobe_event(struct trace_kprobe *tk);
  static int unregister_kprobe_event(struct trace_kprobe *tk);
  
@@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
  #ifdef CONFIG_PERF_EVENTS
  
  /* Kprobe profile handler */
-static void
+static int
  kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
  {
         struct trace_event_call *call = &tk->tp.call;
@@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
         int size, __size, dsize;
         int rctx;
  
-       if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
-               return;
+       if (bpf_prog_array_valid(call)) {
+               int ret;
+
+               ret = trace_call_bpf(call, regs);
+
+               /*
+                * We need to check and see if we modified the pc of the
+                * pt_regs, and if so clear the kprobe and return 1 so that we
+                * don't do the instruction skipping.  Also reset our state so
+                * we are clean the next pass through.
+                */
+               if (__this_cpu_read(bpf_kprobe_override)) {
+                       __this_cpu_write(bpf_kprobe_override, 0);
+                       reset_current_kprobe();
+                       return 1;
+               }
+               if (!ret)
+                       return 0;
+       }
  
         head = this_cpu_ptr(call->perf_events);
         if (hlist_empty(head))
-               return;
+               return 0;
  
         dsize = __get_data_size(&tk->tp, regs);
         __size = sizeof(*entry) + tk->tp.size + dsize;
@@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
  
         entry = perf_trace_buf_alloc(size, NULL, &rctx);
         if (!entry)
-               return;
+               return 0;
  
         entry->ip = (unsigned long)tk->rp.kp.addr;
         memset(&entry[1], 0, dsize);
         store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
         perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
                               head, NULL);
+       return 0;
  }
  NOKPROBE_SYMBOL(kprobe_perf_func);
  
@@ -1275,6 +1315,7 @@ static int kprobe_register(struct trace_event_call *event,
  static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
  {
         struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
+       int ret = 0;
  
         raw_cpu_inc(*tk->nhit);
  
@@ -1282,9 +1323,9 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
                 kprobe_trace_func(tk, regs);
  #ifdef CONFIG_PERF_EVENTS
         if (tk->tp.flags & TP_FLAG_PROFILE)
-               kprobe_perf_func(tk, regs);
+               ret = kprobe_perf_func(tk, regs);
  #endif
-       return 0;       /* We don't tweek kernel, so just return 0 */
+       return ret;
  }
  NOKPROBE_SYMBOL(kprobe_dispatcher);
  
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h

index fb66e3e..5e54d74 100644 (file)
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,6 +252,8 @@ struct symbol_cache;
  unsigned long update_symbol_cache(struct symbol_cache *sc);
  void free_symbol_cache(struct symbol_cache *sc);
  struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
+int trace_kprobe_ftrace(struct trace_event_call *call);
+int trace_kprobe_error_injectable(struct trace_event_call *call);
  #else
  /* uprobes do not support symbol fetch methods */
  #define fetch_symbol_u8                        NULL
@@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset)
  {
         return NULL;
  }
+
+static inline int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+       return 0;
+}
+
+static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+       return 0;
+}
  #endif /* CONFIG_KPROBE_EVENTS */
  
  struct probe_arg {
author	Josef Bacik <jbacik@fb.com>
	Mon, 11 Dec 2017 16:36:48 +0000 (11:36 -0500)
committer	Alexei Starovoitov <ast@kernel.org>
	Tue, 12 Dec 2017 17:02:34 +0000 (09:02 -0800)
arch/Kconfig		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/include/asm/kprobes.h		patch \| blob \| history
arch/x86/include/asm/ptrace.h		patch \| blob \| history
arch/x86/kernel/kprobes/ftrace.c		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/linux/trace_events.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
kernel/events/core.c		patch \| blob \| history
kernel/trace/Kconfig		patch \| blob \| history
kernel/trace/bpf_trace.c		patch \| blob \| history
kernel/trace/trace_kprobe.c		patch \| blob \| history
kernel/trace/trace_probe.h		patch \| blob \| history