bpf: Add bpf_xdp_output() helper

author Eelco Chaudron <echaudro@redhat.com>

Fri, 6 Mar 2020 08:59:23 +0000 (08:59 +0000)

committer Alexei Starovoitov <ast@kernel.org>

Fri, 13 Mar 2020 00:47:38 +0000 (17:47 -0700)
author Eelco Chaudron <echaudro@redhat.com>
Fri, 6 Mar 2020 08:59:23 +0000 (08:59 +0000)
committer Alexei Starovoitov <ast@kernel.org>
Fri, 13 Mar 2020 00:47:38 +0000 (17:47 -0700)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 15b239da775bbc9a3699d5ebb4aa1c7e49ea29a4..5d01c5c7e598612ed316204e7b95f218b90f1134 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2927,6 +2927,29 @@ union bpf_attr {
   *
   *             **-ENOENT** if pidns does not exists for the current task.
   *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ *             This helper is similar to **bpf_perf_eventoutput**\ () but
+ *             restricted to raw_tracepoint bpf programs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -3049,7 +3072,8 @@ union bpf_attr {
         FN(send_signal_thread),         \
         FN(jiffies64),                  \
         FN(read_branch_records),        \
-       FN(get_ns_current_pid_tgid),
+       FN(get_ns_current_pid_tgid),    \
+       FN(xdp_output),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 55d376c53f7d3b4eb8e0be580a357f86697f1431..745f3cfdf3b2a4b1aaa5f47a1a1ebb137c2693c7 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3650,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                 if (func_id != BPF_FUNC_perf_event_read &&
                     func_id != BPF_FUNC_perf_event_output &&
                     func_id != BPF_FUNC_skb_output &&
-                   func_id != BPF_FUNC_perf_event_read_value)
+                   func_id != BPF_FUNC_perf_event_read_value &&
+                   func_id != BPF_FUNC_xdp_output)
                         goto error;
                 break;
         case BPF_MAP_TYPE_STACK_TRACE:
@@ -3740,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
         case BPF_FUNC_perf_event_output:
         case BPF_FUNC_perf_event_read_value:
         case BPF_FUNC_skb_output:
+       case BPF_FUNC_xdp_output:
                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
                         goto error;
                 break;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index b5071c7e93ca9604309d8495f5787d930bda26dd..e619eedb591915997d93393a58d30d1aa2914952 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1145,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
  };
  
  extern const struct bpf_func_proto bpf_skb_output_proto;
+extern const struct bpf_func_proto bpf_xdp_output_proto;
  
  BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
            struct bpf_map *, map, u64, flags)
@@ -1220,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  #ifdef CONFIG_NET
         case BPF_FUNC_skb_output:
                 return &bpf_skb_output_proto;
+       case BPF_FUNC_xdp_output:
+               return &bpf_xdp_output_proto;
  #endif
         default:
                 return raw_tp_prog_func_proto(func_id, prog);
diff --git a/net/core/filter.c b/net/core/filter.c

index cd0a532db4e7189637f8aac07cf165ba7351eac3..22219544410fa5f17567c18a3bcf5dc9a4f72314 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
  
         if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                 return -EINVAL;
-       if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+       if (unlikely(!xdp ||
+                    xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
                 return -EFAULT;
  
         return bpf_event_output(map, flags, meta, meta_size, xdp->data,
@@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
         .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
  };
  
+static int bpf_xdp_output_btf_ids[5];
+const struct bpf_func_proto bpf_xdp_output_proto = {
+       .func           = bpf_xdp_event_output,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
+       .btf_id         = bpf_xdp_output_btf_ids,
+};
+
  BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
  {
         return skb->sk ? sock_gen_cookie(skb->sk) : 0;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 15b239da775bbc9a3699d5ebb4aa1c7e49ea29a4..5d01c5c7e598612ed316204e7b95f218b90f1134 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2927,6 +2927,29 @@ union bpf_attr {
   *
   *             **-ENOENT** if pidns does not exists for the current task.
   *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ *             This helper is similar to **bpf_perf_eventoutput**\ () but
+ *             restricted to raw_tracepoint bpf programs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -3049,7 +3072,8 @@ union bpf_attr {
         FN(send_signal_thread),         \
         FN(jiffies64),                  \
         FN(read_branch_records),        \
-       FN(get_ns_current_pid_tgid),
+       FN(get_ns_current_pid_tgid),    \
+       FN(xdp_output),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c

index 4ba011031d4c3945fe1d4aeb955f8ec7fe075e0b..a0f688c370235d978f4d0c1a06aed3f46c04bb4c 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -4,17 +4,51 @@
  #include "test_xdp.skel.h"
  #include "test_xdp_bpf2bpf.skel.h"
  
+struct meta {
+       int ifindex;
+       int pkt_len;
+};
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+       int duration = 0;
+       struct meta *meta = (struct meta *)data;
+       struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+
+       if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+                 "check_size", "size %u < %zu\n",
+                 size, sizeof(pkt_v4) + sizeof(*meta)))
+               return;
+
+       if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
+                 "meta->ifindex = %d\n", meta->ifindex))
+               return;
+
+       if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+                 "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+               return;
+
+       if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+                 "check_packet_content", "content not the same\n"))
+               return;
+
+       *(bool *)ctx = true;
+}
+
  void test_xdp_bpf2bpf(void)
  {
         __u32 duration = 0, retval, size;
         char buf[128];
         int err, pkt_fd, map_fd;
+       bool passed = false;
         struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
         struct iptnl_info value4 = {.family = AF_INET};
         struct test_xdp *pkt_skel = NULL;
         struct test_xdp_bpf2bpf *ftrace_skel = NULL;
         struct vip key4 = {.protocol = 6, .family = AF_INET};
         struct bpf_program *prog;
+       struct perf_buffer *pb = NULL;
+       struct perf_buffer_opts pb_opts = {};
  
         /* Load XDP program to introspect */
         pkt_skel = test_xdp__open_and_load();
@@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void)
         if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
                 goto out;
  
+       /* Set up perf buffer */
+       pb_opts.sample_cb = on_sample;
+       pb_opts.ctx = &passed;
+       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+                             1, &pb_opts);
+       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+               goto out;
+
         /* Run test program */
         err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                 buf, &size, &retval, &duration);
@@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void)
                   err, errno, retval, size))
                 goto out;
  
+       /* Make sure bpf_xdp_output() was triggered and it sent the expected
+        * data to the perf ring buffer.
+        */
+       err = perf_buffer__poll(pb, 100);
+       if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+               goto out;
+
+       CHECK_FAIL(!passed);
+
         /* Verify test results */
         if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
                   "result", "fentry failed err %llu\n",
@@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void)
               "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
  
  out:
+       if (pb)
+               perf_buffer__free(pb);
         test_xdp__destroy(pkt_skel);
         test_xdp_bpf2bpf__destroy(ftrace_skel);
  }
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c

index 42dd2fedd588a18ab0f8f6042ecb1b70a98b68fc..a038e827f850a077cf8ab616c6496c70fe829c37 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -3,6 +3,8 @@
  #include <bpf/bpf_tracing.h>
  #include <bpf/bpf_helpers.h>
  
+char _license[] SEC("license") = "GPL";
+
  struct net_device {
         /* Structure does not need to contain all entries,
          * as "preserve_access_index" will use BTF to fix this...
@@ -27,10 +29,32 @@ struct xdp_buff {
         struct xdp_rxq_info *rxq;
  } __attribute__((preserve_access_index));
  
+struct meta {
+       int ifindex;
+       int pkt_len;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
  __u64 test_result_fentry = 0;
  SEC("fentry/FUNC")
  int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
  {
+       struct meta meta;
+       void *data_end = (void *)(long)xdp->data_end;
+       void *data = (void *)(long)xdp->data;
+
+       meta.ifindex = xdp->rxq->dev->ifindex;
+       meta.pkt_len = data_end - data;
+       bpf_xdp_output(xdp, &perf_buf_map,
+                      ((__u64) meta.pkt_len << 32) |
+                      BPF_F_CURRENT_CPU,
+                      &meta, sizeof(meta));
+
         test_result_fentry = xdp->rxq->dev->ifindex;
         return 0;
  }
author	Eelco Chaudron <echaudro@redhat.com>
	Fri, 6 Mar 2020 08:59:23 +0000 (08:59 +0000)
committer	Alexei Starovoitov <ast@kernel.org>
	Fri, 13 Mar 2020 00:47:38 +0000 (17:47 -0700)
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
kernel/trace/bpf_trace.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c		patch \| blob \| history