bpf: add bpf_get_stack helper

author Yonghong Song <yhs@fb.com>

Sun, 29 Apr 2018 05:28:08 +0000 (22:28 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Sun, 29 Apr 2018 15:45:53 +0000 (08:45 -0700)
author Yonghong Song <yhs@fb.com>
Sun, 29 Apr 2018 05:28:08 +0000 (22:28 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Sun, 29 Apr 2018 15:45:53 +0000 (08:45 -0700)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index 38ebbc6..c553f6f 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -692,6 +692,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto;
  extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
  extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
  extern const struct bpf_func_proto bpf_get_stackid_proto;
+extern const struct bpf_func_proto bpf_get_stack_proto;
  extern const struct bpf_func_proto bpf_sock_map_update_proto;
  
  /* Shared helpers among cBPF and eBPF. */
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 4da8b23..64899c0 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -468,7 +468,8 @@ struct bpf_prog {
                                 dst_needed:1,   /* Do we need dst entry? */
                                 blinded:1,      /* Was blinded */
                                 is_func:1,      /* program is a bpf function */
-                               kprobe_override:1; /* Do we override a kprobe? */
+                               kprobe_override:1, /* Do we override a kprobe? */
+                               has_callchain_buf:1; /* callchain buffer allocated? */
         enum bpf_prog_type      type;           /* Type of BPF program */
         enum bpf_attach_type    expected_attach_type; /* For some prog types */
         u32                     len;            /* Number of filter blocks */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index da77a93..1afb606 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1767,6 +1767,40 @@ union bpf_attr {
   *             **CONFIG_XFRM** configuration option.
   *     Return
   *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ *     Description
+ *             Return a user or a kernel stack in bpf program provided buffer.
+ *             To achieve this, the helper needs *ctx*, which is a pointer
+ *             to the context on which the tracing program is executed.
+ *             To store the stacktrace, the bpf program provides *buf* with
+ *             a nonnegative *size*.
+ *
+ *             The last argument, *flags*, holds the number of stack frames to
+ *             skip (from 0 to 255), masked with
+ *             **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ *             the following flags:
+ *
+ *             **BPF_F_USER_STACK**
+ *                     Collect a user space stack instead of a kernel stack.
+ *             **BPF_F_USER_BUILD_ID**
+ *                     Collect buildid+offset instead of ips for user stack,
+ *                     only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ *             **bpf_get_stack**\ () can collect up to
+ *             **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ *             to sufficient large buffer size. Note that
+ *             this limit can be controlled with the **sysctl** program, and
+ *             that it should be manually increased in order to profile long
+ *             user stacks (such as stacks for Java programs). To do so, use:
+ *
+ *     ::
+ *
+ *             # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ *     Return
+ *             a non-negative value equal to or less than size on success, or
+ *             a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -1835,7 +1869,8 @@ union bpf_attr {
         FN(msg_pull_data),              \
         FN(bind),                       \
         FN(xdp_adjust_tail),            \
-       FN(skb_get_xfrm_state),
+       FN(skb_get_xfrm_state),         \
+       FN(get_stack),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -1869,11 +1904,14 @@ enum bpf_func_id {
  /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
  #define BPF_F_TUNINFO_IPV6             (1ULL << 0)
  
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
  #define BPF_F_SKIP_FIELD_MASK          0xffULL
  #define BPF_F_USER_STACK               (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
  #define BPF_F_FAST_STACK_CMP           (1ULL << 9)
  #define BPF_F_REUSE_STACKID            (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID            (1ULL << 11)
  
  /* BPF_FUNC_skb_set_tunnel_key flags. */
  #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index ba03ec3..9349a5d 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -31,6 +31,7 @@
  #include <linux/rbtree_latch.h>
  #include <linux/kallsyms.h>
  #include <linux/rcupdate.h>
+#include <linux/perf_event.h>
  
  #include <asm/unaligned.h>
  
@@ -1722,6 +1723,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
         aux = container_of(work, struct bpf_prog_aux, work);
         if (bpf_prog_is_dev_bound(aux))
                 bpf_prog_offload_destroy(aux->prog);
+#ifdef CONFIG_PERF_EVENTS
+       if (aux->prog->has_callchain_buf)
+               put_callchain_buffers();
+#endif
         for (i = 0; i < aux->func_cnt; i++)
                 bpf_jit_free(aux->func[i]);
         if (aux->func_cnt) {
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c

index 04f6ec1..3ba102b 100644 (file)
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -402,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
         .arg3_type      = ARG_ANYTHING,
  };
  
+BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
+          u64, flags)
+{
+       u32 init_nr, trace_nr, copy_len, elem_size, num_elem;
+       bool user_build_id = flags & BPF_F_USER_BUILD_ID;
+       u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
+       bool user = flags & BPF_F_USER_STACK;
+       struct perf_callchain_entry *trace;
+       bool kernel = !user;
+       int err = -EINVAL;
+       u64 *ips;
+
+       if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
+                              BPF_F_USER_BUILD_ID)))
+               goto clear;
+       if (kernel && user_build_id)
+               goto clear;
+
+       elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
+                                           : sizeof(u64);
+       if (unlikely(size % elem_size))
+               goto clear;
+
+       num_elem = size / elem_size;
+       if (sysctl_perf_event_max_stack < num_elem)
+               init_nr = 0;
+       else
+               init_nr = sysctl_perf_event_max_stack - num_elem;
+       trace = get_perf_callchain(regs, init_nr, kernel, user,
+                                  sysctl_perf_event_max_stack, false, false);
+       if (unlikely(!trace))
+               goto err_fault;
+
+       trace_nr = trace->nr - init_nr;
+       if (trace_nr < skip)
+               goto err_fault;
+
+       trace_nr -= skip;
+       trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
+       copy_len = trace_nr * elem_size;
+       ips = trace->ip + skip + init_nr;
+       if (user && user_build_id)
+               stack_map_get_build_id_offset(buf, ips, trace_nr, user);
+       else
+               memcpy(buf, ips, copy_len);
+
+       if (size > copy_len)
+               memset(buf + copy_len, 0, size - copy_len);
+       return copy_len;
+
+err_fault:
+       err = -EFAULT;
+clear:
+       memset(buf, 0, size);
+       return err;
+}
+
+const struct bpf_func_proto bpf_get_stack_proto = {
+       .func           = bpf_get_stack,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
+};
+
  /* Called from eBPF program */
  static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
  {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index eb1a596..253f6bd 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -22,6 +22,7 @@
  #include <linux/stringify.h>
  #include <linux/bsearch.h>
  #include <linux/sort.h>
+#include <linux/perf_event.h>
  
  #include "disasm.h"
  
@@ -2450,6 +2451,24 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
         if (err)
                 return err;
  
+       if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
+               const char *err_str;
+
+#ifdef CONFIG_PERF_EVENTS
+               err = get_callchain_buffers(sysctl_perf_event_max_stack);
+               err_str = "cannot get callchain buffer for func %s#%d\n";
+#else
+               err = -ENOTSUPP;
+               err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
+#endif
+               if (err) {
+                       verbose(env, err_str, func_id_name(func_id), func_id);
+                       return err;
+               }
+
+               env->prog->has_callchain_buf = true;
+       }
+
         if (changes_data)
                 clear_all_pkt_pointers(env);
         return 0;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index 56ba0f2..46d866e 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -20,6 +20,7 @@
  #include "trace.h"
  
  u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
  
  /**
   * trace_call_bpf - invoke BPF program
@@ -577,6 +578,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_perf_event_output_proto;
         case BPF_FUNC_get_stackid:
                 return &bpf_get_stackid_proto;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto;
         case BPF_FUNC_perf_event_read_value:
                 return &bpf_perf_event_read_value_proto;
  #ifdef CONFIG_BPF_KPROBE_OVERRIDE
@@ -664,6 +667,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
         .arg3_type      = ARG_ANYTHING,
  };
  
+BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
+          u64, flags)
+{
+       struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
+       return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+                            (unsigned long) size, flags, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stack_proto_tp = {
+       .func           = bpf_get_stack_tp,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
+};
+
  static const struct bpf_func_proto *
  tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  {
@@ -672,6 +694,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_perf_event_output_proto_tp;
         case BPF_FUNC_get_stackid:
                 return &bpf_get_stackid_proto_tp;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto_tp;
         default:
                 return tracing_func_proto(func_id, prog);
         }
@@ -734,6 +758,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_perf_event_output_proto_tp;
         case BPF_FUNC_get_stackid:
                 return &bpf_get_stackid_proto_tp;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto_tp;
         case BPF_FUNC_perf_prog_read_value:
                 return &bpf_perf_prog_read_value_proto;
         default:
@@ -744,7 +770,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  /*
   * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
   * to avoid potential recursive reuse issue when/if tracepoints are added
- * inside bpf_*_event_output and/or bpf_get_stack_id
+ * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
   */
  static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
  BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
@@ -787,6 +813,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
         .arg3_type      = ARG_ANYTHING,
  };
  
+BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
+          void *, buf, u32, size, u64, flags)
+{
+       struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
+
+       perf_fetch_caller_regs(regs);
+       return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
+                            (unsigned long) size, flags, 0);
+}
+
+static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
+       .func           = bpf_get_stack_raw_tp,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_PTR_TO_MEM,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
+};
+
  static const struct bpf_func_proto *
  raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  {
@@ -795,6 +841,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                 return &bpf_perf_event_output_proto_raw_tp;
         case BPF_FUNC_get_stackid:
                 return &bpf_get_stackid_proto_raw_tp;
+       case BPF_FUNC_get_stack:
+               return &bpf_get_stack_proto_raw_tp;
         default:
                 return tracing_func_proto(func_id, prog);
         }
author	Yonghong Song <yhs@fb.com>
	Sun, 29 Apr 2018 05:28:08 +0000 (22:28 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Sun, 29 Apr 2018 15:45:53 +0000 (08:45 -0700)
include/linux/bpf.h		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/stackmap.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
kernel/trace/bpf_trace.c		patch \| blob \| history