This adds XDP support to BCC as currently supported in net-next.

author Jan Rüth <rueth@comsys.rwth-aachen.de>

Thu, 28 Jul 2016 20:32:46 +0000 (22:32 +0200)

committer Jan Rüth <rueth@comsys.rwth-aachen.de>

Thu, 28 Jul 2016 20:40:06 +0000 (22:40 +0200)
author Jan Rüth <rueth@comsys.rwth-aachen.de>
Thu, 28 Jul 2016 20:32:46 +0000 (22:32 +0200)
committer Jan Rüth <rueth@comsys.rwth-aachen.de>
Thu, 28 Jul 2016 20:40:06 +0000 (22:40 +0200)
diff --git a/src/cc/compat/linux/bpf.h b/src/cc/compat/linux/bpf.h

index 23917bb..da218fe 100644 (file)
--- a/src/cc/compat/linux/bpf.h
+++ b/src/cc/compat/linux/bpf.h
@@ -84,6 +84,7 @@ enum bpf_map_type {
         BPF_MAP_TYPE_PERCPU_HASH,
         BPF_MAP_TYPE_PERCPU_ARRAY,
         BPF_MAP_TYPE_STACK_TRACE,
+       BPF_MAP_TYPE_CGROUP_ARRAY,
  };
  
  enum bpf_prog_type {
@@ -92,6 +93,8 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_KPROBE,
         BPF_PROG_TYPE_SCHED_CLS,
         BPF_PROG_TYPE_SCHED_ACT,
+       BPF_PROG_TYPE_TRACEPOINT,
+       BPF_PROG_TYPE_XDP,
  };
  
  #define BPF_PSEUDO_MAP_FD      1
@@ -312,6 +315,66 @@ enum bpf_func_id {
          */
         BPF_FUNC_skb_get_tunnel_opt,
         BPF_FUNC_skb_set_tunnel_opt,
+
+       /**
+        * bpf_skb_change_proto(skb, proto, flags)
+        * Change protocol of the skb. Currently supported is
+        * v4 -> v6, v6 -> v4 transitions. The helper will also
+        * resize the skb. eBPF program is expected to fill the
+        * new headers via skb_store_bytes and lX_csum_replace.
+        * @skb: pointer to skb
+        * @proto: new skb->protocol type
+        * @flags: reserved
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_proto,
+
+       /**
+        * bpf_skb_change_type(skb, type)
+        * Change packet type of skb.
+        * @skb: pointer to skb
+        * @type: new skb->pkt_type type
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_type,
+
+       /**
+        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * @skb: pointer to skb
+        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+        * @index: index of the cgroup in the bpf_map
+        * Return:
+        *   == 0 skb failed the cgroup2 descendant test
+        *   == 1 skb succeeded the cgroup2 descendant test
+        *    < 0 error
+        */
+       BPF_FUNC_skb_in_cgroup,
+
+       /**
+        * bpf_get_hash_recalc(skb)
+        * Retrieve and possibly recalculate skb->hash.
+        * @skb: pointer to skb
+        * Return: hash
+        */
+       BPF_FUNC_get_hash_recalc,
+
+       /**
+        * u64 bpf_get_current_task(void)
+        * Returns current task_struct
+        * Return: current
+        */
+       BPF_FUNC_get_current_task,
+
+       /**
+        * bpf_probe_write_user(void *dst, void *src, int len)
+        * safely attempt to write to a location
+        * @dst: destination address in userspace
+        * @src: source address on stack
+        * @len: number of bytes to copy
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_probe_write_user,
+
         __BPF_FUNC_MAX_ID,
  };
  
@@ -346,6 +409,12 @@ enum bpf_func_id {
  #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
  #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
  
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+#define BPF_F_INDEX_MASK               0xffffffffULL
+#define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
+
  /* user accessible mirror of in-kernel sk_buff.
   * new fields can only be added to the end of this structure
   */
@@ -365,6 +434,8 @@ struct __sk_buff {
         __u32 cb[5];
         __u32 hash;
         __u32 tc_classid;
+       __u32 data;
+       __u32 data_end;
  };
  
  struct bpf_tunnel_key {
@@ -379,4 +450,24 @@ struct bpf_tunnel_key {
         __u32 tunnel_label;
  };
  
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+       XDP_ABORTED = 0,
+       XDP_DROP,
+       XDP_PASS,
+       XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+       __u32 data;
+       __u32 data_end;
+};
+
  #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h

index 60f906b..227b17f 100644 (file)
--- a/src/cc/compat/linux/virtual_bpf.h
+++ b/src/cc/compat/linux/virtual_bpf.h
@@ -85,6 +85,7 @@ enum bpf_map_type {
         BPF_MAP_TYPE_PERCPU_HASH,
         BPF_MAP_TYPE_PERCPU_ARRAY,
         BPF_MAP_TYPE_STACK_TRACE,
+       BPF_MAP_TYPE_CGROUP_ARRAY,
  };
  
  enum bpf_prog_type {
@@ -93,6 +94,8 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_KPROBE,
         BPF_PROG_TYPE_SCHED_CLS,
         BPF_PROG_TYPE_SCHED_ACT,
+       BPF_PROG_TYPE_TRACEPOINT,
+       BPF_PROG_TYPE_XDP,
  };
  
  #define BPF_PSEUDO_MAP_FD      1
@@ -313,6 +316,66 @@ enum bpf_func_id {
          */
         BPF_FUNC_skb_get_tunnel_opt,
         BPF_FUNC_skb_set_tunnel_opt,
+
+       /**
+        * bpf_skb_change_proto(skb, proto, flags)
+        * Change protocol of the skb. Currently supported is
+        * v4 -> v6, v6 -> v4 transitions. The helper will also
+        * resize the skb. eBPF program is expected to fill the
+        * new headers via skb_store_bytes and lX_csum_replace.
+        * @skb: pointer to skb
+        * @proto: new skb->protocol type
+        * @flags: reserved
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_proto,
+
+       /**
+        * bpf_skb_change_type(skb, type)
+        * Change packet type of skb.
+        * @skb: pointer to skb
+        * @type: new skb->pkt_type type
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_skb_change_type,
+
+       /**
+        * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+        * @skb: pointer to skb
+        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+        * @index: index of the cgroup in the bpf_map
+        * Return:
+        *   == 0 skb failed the cgroup2 descendant test
+        *   == 1 skb succeeded the cgroup2 descendant test
+        *    < 0 error
+        */
+       BPF_FUNC_skb_in_cgroup,
+
+       /**
+        * bpf_get_hash_recalc(skb)
+        * Retrieve and possibly recalculate skb->hash.
+        * @skb: pointer to skb
+        * Return: hash
+        */
+       BPF_FUNC_get_hash_recalc,
+
+       /**
+        * u64 bpf_get_current_task(void)
+        * Returns current task_struct
+        * Return: current
+        */
+       BPF_FUNC_get_current_task,
+
+       /**
+        * bpf_probe_write_user(void *dst, void *src, int len)
+        * safely attempt to write to a location
+        * @dst: destination address in userspace
+        * @src: source address on stack
+        * @len: number of bytes to copy
+        * Return: 0 on success or negative error
+        */
+       BPF_FUNC_probe_write_user,
+
         __BPF_FUNC_MAX_ID,
  };
  
@@ -347,6 +410,12 @@ enum bpf_func_id {
  #define BPF_F_ZERO_CSUM_TX             (1ULL << 1)
  #define BPF_F_DONT_FRAGMENT            (1ULL << 2)
  
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+#define BPF_F_INDEX_MASK               0xffffffffULL
+#define BPF_F_CURRENT_CPU              BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK              (0xfffffULL << 32)
+
  /* user accessible mirror of in-kernel sk_buff.
   * new fields can only be added to the end of this structure
   */
@@ -366,6 +435,8 @@ struct __sk_buff {
         __u32 cb[5];
         __u32 hash;
         __u32 tc_classid;
+       __u32 data;
+       __u32 data_end;
  };
  
  struct bpf_tunnel_key {
@@ -380,5 +451,25 @@ struct bpf_tunnel_key {
         __u32 tunnel_label;
  };
  
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+       XDP_ABORTED = 0,
+       XDP_DROP,
+       XDP_PASS,
+       XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+       __u32 data;
+       __u32 data_end;
+};
+
  #endif /* _UAPI__LINUX_BPF_H__ */
  )********"
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c

index 93b157e..0068e1f 100644 (file)
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -408,3 +408,104 @@ error:
  
    return NULL;
  }
+
+
+
+int bpf_attach_xdp(const char *dev_name, int progfd) {
+    struct sockaddr_nl sa;
+    int sock, seq = 0, len, ret = -1;
+    char buf[4096];
+    struct nlattr *nla, *nla_xdp;
+    struct {
+        struct nlmsghdr  nh;
+        struct ifinfomsg ifinfo;
+        char             attrbuf[64];
+    } req;
+    struct nlmsghdr *nh;
+    struct nlmsgerr *err;
+
+    memset(&sa, 0, sizeof(sa));
+    sa.nl_family = AF_NETLINK;
+
+    sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+    if (sock < 0) {
+        fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
+        return -1;
+    }
+
+    if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+        fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
+        goto cleanup;
+    }
+
+    memset(&req, 0, sizeof(req));
+    req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+    req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+    req.nh.nlmsg_type = RTM_SETLINK;
+    req.nh.nlmsg_pid = 0;
+    req.nh.nlmsg_seq = ++seq;
+    req.ifinfo.ifi_family = AF_UNSPEC;
+    req.ifinfo.ifi_index = if_nametoindex(dev_name);
+    if (req.ifinfo.ifi_index == 0) {
+        fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
+        goto cleanup;
+    }
+
+    nla = (struct nlattr *)(((char *)&req)
+                            + NLMSG_ALIGN(req.nh.nlmsg_len));
+    nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+
+    nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
+
+    // we specify the FD passed over by the user
+    nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+    nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+    memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
+    nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
+
+    req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+    if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+        fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
+        goto cleanup;
+    }
+
+    len = recv(sock, buf, sizeof(buf), 0);
+    if (len < 0) {
+        fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
+        goto cleanup;
+    }
+
+    for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+         nh = NLMSG_NEXT(nh, len)) {
+        if (nh->nlmsg_pid != getpid()) {
+            fprintf(stderr, "bpf: Wrong pid %d, expected %d\n",
+                   nh->nlmsg_pid, getpid());
+            errno = EBADMSG;
+            goto cleanup;
+        }
+        if (nh->nlmsg_seq != seq) {
+            fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
+                   nh->nlmsg_seq, seq);
+            errno = EBADMSG;
+            goto cleanup;
+        }
+        switch (nh->nlmsg_type) {
+            case NLMSG_ERROR:
+                err = (struct nlmsgerr *)NLMSG_DATA(nh);
+                if (!err->error)
+                    continue;
+                fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
+                errno = -err->error;
+                goto cleanup;
+            case NLMSG_DONE:
+                break;
+        }
+    }
+
+    ret = 0;
+
+cleanup:
+    close(sock);
+    return ret;
+}
diff --git a/src/libbpf.h b/src/libbpf.h

index 9b1844f..de72ff8 100644 (file)
--- a/src/libbpf.h
+++ b/src/libbpf.h
@@ -61,6 +61,9 @@ int bpf_detach_tracepoint(const char *tp_category, const char *tp_name);
  
  void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu);
  
+/* attached a prog expressed by progfd to the device specified in dev_name */
+int bpf_attach_xdp(const char *dev_name, int progfd);
+
  #define LOG_BUF_SIZE 65536
  extern char bpf_log_buf[LOG_BUF_SIZE];
  
diff --git a/src/python/bcc/__init__.py b/src/python/bcc/__init__.py

index 6b4259e..5c6ae1e 100644 (file)
--- a/src/python/bcc/__init__.py
+++ b/src/python/bcc/__init__.py
@@ -21,6 +21,7 @@ import multiprocessing
  import os
  import re
  import struct
+import errno
  import sys
  basestring = (unicode if sys.version_info[0] < 3 else str)
  
@@ -67,6 +68,7 @@ class BPF(object):
      SCHED_CLS = 3
      SCHED_ACT = 4
      TRACEPOINT = 5
+    XDP = 6
  
      _probe_repl = re.compile("[^a-zA-Z0-9_]")
      _sym_caches = {}
@@ -439,6 +441,39 @@ class BPF(object):
              raise Exception("Failed to detach BPF from kprobe")
          self._del_kprobe(ev_name)
  
+    @staticmethod
+    def attach_xdp(dev, fn):
+        '''
+            This function attaches a BPF function to a device on the device
+            driver level (XDP)
+        '''
+        if not isinstance(fn, BPF.Function):
+            raise Exception("arg 1 must be of type BPF.Function")
+        res = lib.bpf_attach_xdp(dev.encode("ascii"), fn.fd)
+        if res < 0:
+            err_no = ct.get_errno()
+            if err_no == errno.EBADMSG:
+                raise Exception("Internal error while attaching BFP to device,"+
+                    " try increasing the debug level!")
+            else:
+                errstr = os.strerror(err_no)
+                raise Exception("Failed to attach BPF to device %s: %s"
+                            % (dev, errstr))
+
+    @staticmethod
+    def remove_xdp(dev):
+        '''
+            This function removes any BPF function from a device on the 
+            device driver level (XDP)
+        '''
+        res = lib.bpf_attach_xdp(dev.encode("ascii"), -1)
+        if res < 0:
+            errstr = os.strerror(ct.get_errno())
+            raise Exception("Failed to detach BPF from device %s: %s"
+                            % (dev, errstr))
+
+
+
      @classmethod
      def _check_path_symbol(cls, module, symname, addr):
          sym = bcc_symbol()
diff --git a/src/python/bcc/libbcc.py b/src/python/bcc/libbcc.py

index c847c00..59509c2 100644 (file)
--- a/src/python/bcc/libbcc.py
+++ b/src/python/bcc/libbcc.py
@@ -108,6 +108,9 @@ lib.perf_reader_free.argtypes = [ct.c_void_p]
  lib.perf_reader_fd.restype = int
  lib.perf_reader_fd.argtypes = [ct.c_void_p]
  
+lib.bpf_attach_xdp.restype = ct.c_int;
+lib.bpf_attach_xdp.argtypes = [ct.c_char_p, ct.c_int]
+
  # bcc symbol helpers
  class bcc_symbol(ct.Structure):
      _fields_ = [
author	Jan Rüth <rueth@comsys.rwth-aachen.de>
	Thu, 28 Jul 2016 20:32:46 +0000 (22:32 +0200)
committer	Jan Rüth <rueth@comsys.rwth-aachen.de>
	Thu, 28 Jul 2016 20:40:06 +0000 (22:40 +0200)
src/cc/compat/linux/bpf.h		patch \| blob \| history
src/cc/compat/linux/virtual_bpf.h		patch \| blob \| history
src/cc/libbpf.c		patch \| blob \| history
src/libbpf.h		patch \| blob \| history
src/python/bcc/__init__.py		patch \| blob \| history
src/python/bcc/libbcc.py		patch \| blob \| history