bpf: Add bpf_xdp_output() helper
authorEelco Chaudron <echaudro@redhat.com>
Fri, 6 Mar 2020 08:59:23 +0000 (08:59 +0000)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 13 Mar 2020 00:47:38 +0000 (17:47 -0700)
Introduce new helper that reuses existing xdp perf_event output
implementation, but can be called from raw_tracepoint programs
that receive 'struct xdp_buff *' as a tracepoint argument.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/158348514556.2239.11050972434793741444.stgit@xdp-tutorial
include/uapi/linux/bpf.h
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/core/filter.c
tools/include/uapi/linux/bpf.h
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c

index 15b239da775bbc9a3699d5ebb4aa1c7e49ea29a4..5d01c5c7e598612ed316204e7b95f218b90f1134 100644 (file)
@@ -2927,6 +2927,29 @@ union bpf_attr {
  *
  *             **-ENOENT** if pidns does not exists for the current task.
  *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ *             This helper is similar to **bpf_perf_eventoutput**\ () but
+ *             restricted to raw_tracepoint bpf programs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -3049,7 +3072,8 @@ union bpf_attr {
        FN(send_signal_thread),         \
        FN(jiffies64),                  \
        FN(read_branch_records),        \
-       FN(get_ns_current_pid_tgid),
+       FN(get_ns_current_pid_tgid),    \
+       FN(xdp_output),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
index 55d376c53f7d3b4eb8e0be580a357f86697f1431..745f3cfdf3b2a4b1aaa5f47a1a1ebb137c2693c7 100644 (file)
@@ -3650,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
                if (func_id != BPF_FUNC_perf_event_read &&
                    func_id != BPF_FUNC_perf_event_output &&
                    func_id != BPF_FUNC_skb_output &&
-                   func_id != BPF_FUNC_perf_event_read_value)
+                   func_id != BPF_FUNC_perf_event_read_value &&
+                   func_id != BPF_FUNC_xdp_output)
                        goto error;
                break;
        case BPF_MAP_TYPE_STACK_TRACE:
@@ -3740,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
        case BPF_FUNC_perf_event_output:
        case BPF_FUNC_perf_event_read_value:
        case BPF_FUNC_skb_output:
+       case BPF_FUNC_xdp_output:
                if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
                        goto error;
                break;
index b5071c7e93ca9604309d8495f5787d930bda26dd..e619eedb591915997d93393a58d30d1aa2914952 100644 (file)
@@ -1145,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
 };
 
 extern const struct bpf_func_proto bpf_skb_output_proto;
+extern const struct bpf_func_proto bpf_xdp_output_proto;
 
 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
           struct bpf_map *, map, u64, flags)
@@ -1220,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_NET
        case BPF_FUNC_skb_output:
                return &bpf_skb_output_proto;
+       case BPF_FUNC_xdp_output:
+               return &bpf_xdp_output_proto;
 #endif
        default:
                return raw_tp_prog_func_proto(func_id, prog);
index cd0a532db4e7189637f8aac07cf165ba7351eac3..22219544410fa5f17567c18a3bcf5dc9a4f72314 100644 (file)
@@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
 
        if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
                return -EINVAL;
-       if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+       if (unlikely(!xdp ||
+                    xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
                return -EFAULT;
 
        return bpf_event_output(map, flags, meta, meta_size, xdp->data,
@@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = {
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
+static int bpf_xdp_output_btf_ids[5];
+const struct bpf_func_proto bpf_xdp_output_proto = {
+       .func           = bpf_xdp_event_output,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID,
+       .arg2_type      = ARG_CONST_MAP_PTR,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
+       .btf_id         = bpf_xdp_output_btf_ids,
+};
+
 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
 {
        return skb->sk ? sock_gen_cookie(skb->sk) : 0;
index 15b239da775bbc9a3699d5ebb4aa1c7e49ea29a4..5d01c5c7e598612ed316204e7b95f218b90f1134 100644 (file)
@@ -2927,6 +2927,29 @@ union bpf_attr {
  *
  *             **-ENOENT** if pidns does not exists for the current task.
  *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ *     Description
+ *             Write raw *data* blob into a special BPF perf event held by
+ *             *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ *             event must have the following attributes: **PERF_SAMPLE_RAW**
+ *             as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ *             **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ *             The *flags* are used to indicate the index in *map* for which
+ *             the value must be put, masked with **BPF_F_INDEX_MASK**.
+ *             Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ *             to indicate that the index of the current CPU core should be
+ *             used.
+ *
+ *             The value to write, of *size*, is passed through eBPF stack and
+ *             pointed by *data*.
+ *
+ *             *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ *             This helper is similar to **bpf_perf_eventoutput**\ () but
+ *             restricted to raw_tracepoint bpf programs.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)          \
        FN(unspec),                     \
@@ -3049,7 +3072,8 @@ union bpf_attr {
        FN(send_signal_thread),         \
        FN(jiffies64),                  \
        FN(read_branch_records),        \
-       FN(get_ns_current_pid_tgid),
+       FN(get_ns_current_pid_tgid),    \
+       FN(xdp_output),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
index 4ba011031d4c3945fe1d4aeb955f8ec7fe075e0b..a0f688c370235d978f4d0c1a06aed3f46c04bb4c 100644 (file)
@@ -4,17 +4,51 @@
 #include "test_xdp.skel.h"
 #include "test_xdp_bpf2bpf.skel.h"
 
+struct meta {
+       int ifindex;
+       int pkt_len;
+};
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+       int duration = 0;
+       struct meta *meta = (struct meta *)data;
+       struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+
+       if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+                 "check_size", "size %u < %zu\n",
+                 size, sizeof(pkt_v4) + sizeof(*meta)))
+               return;
+
+       if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
+                 "meta->ifindex = %d\n", meta->ifindex))
+               return;
+
+       if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+                 "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+               return;
+
+       if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+                 "check_packet_content", "content not the same\n"))
+               return;
+
+       *(bool *)ctx = true;
+}
+
 void test_xdp_bpf2bpf(void)
 {
        __u32 duration = 0, retval, size;
        char buf[128];
        int err, pkt_fd, map_fd;
+       bool passed = false;
        struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
        struct iptnl_info value4 = {.family = AF_INET};
        struct test_xdp *pkt_skel = NULL;
        struct test_xdp_bpf2bpf *ftrace_skel = NULL;
        struct vip key4 = {.protocol = 6, .family = AF_INET};
        struct bpf_program *prog;
+       struct perf_buffer *pb = NULL;
+       struct perf_buffer_opts pb_opts = {};
 
        /* Load XDP program to introspect */
        pkt_skel = test_xdp__open_and_load();
@@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void)
        if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
                goto out;
 
+       /* Set up perf buffer */
+       pb_opts.sample_cb = on_sample;
+       pb_opts.ctx = &passed;
+       pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+                             1, &pb_opts);
+       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+               goto out;
+
        /* Run test program */
        err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
                                buf, &size, &retval, &duration);
@@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void)
                  err, errno, retval, size))
                goto out;
 
+       /* Make sure bpf_xdp_output() was triggered and it sent the expected
+        * data to the perf ring buffer.
+        */
+       err = perf_buffer__poll(pb, 100);
+       if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+               goto out;
+
+       CHECK_FAIL(!passed);
+
        /* Verify test results */
        if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
                  "result", "fentry failed err %llu\n",
@@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void)
              "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
 
 out:
+       if (pb)
+               perf_buffer__free(pb);
        test_xdp__destroy(pkt_skel);
        test_xdp_bpf2bpf__destroy(ftrace_skel);
 }
index 42dd2fedd588a18ab0f8f6042ecb1b70a98b68fc..a038e827f850a077cf8ab616c6496c70fe829c37 100644 (file)
@@ -3,6 +3,8 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 
+char _license[] SEC("license") = "GPL";
+
 struct net_device {
        /* Structure does not need to contain all entries,
         * as "preserve_access_index" will use BTF to fix this...
@@ -27,10 +29,32 @@ struct xdp_buff {
        struct xdp_rxq_info *rxq;
 } __attribute__((preserve_access_index));
 
+struct meta {
+       int ifindex;
+       int pkt_len;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
 __u64 test_result_fentry = 0;
 SEC("fentry/FUNC")
 int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
 {
+       struct meta meta;
+       void *data_end = (void *)(long)xdp->data_end;
+       void *data = (void *)(long)xdp->data;
+
+       meta.ifindex = xdp->rxq->dev->ifindex;
+       meta.pkt_len = data_end - data;
+       bpf_xdp_output(xdp, &perf_buf_map,
+                      ((__u64) meta.pkt_len << 32) |
+                      BPF_F_CURRENT_CPU,
+                      &meta, sizeof(meta));
+
        test_result_fentry = xdp->rxq->dev->ifindex;
        return 0;
 }