From 5891757aa71fbf33501528aeec9330f1c03b4b63 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Tue, 23 Jun 2015 06:53:47 -0700 Subject: [PATCH] Add vxlan tunnel monitoring example * Example simulates many vxlan hosts ("hypervisors") with multiple clients per host. Some clients will be sending traffic to another client on a different host. * Add vxlan header to proto.h * Remove unused import line in vlan_learning * NOTE: to display this demo, it is required to checkout the chord graph demo from https://github.com/drzaeus77/chord-transitions.git. Follow the instructions in that repo to get a simple http server up and running. Signed-off-by: Brenden Blanco --- examples/tunnel_monitor.c | 129 +++++++++++++++++++++++++++++++++++++++++++++ examples/tunnel_monitor.py | 80 ++++++++++++++++++++++++++++ examples/tunnel_stats.py | 69 ++++++++++++++++++++++++ examples/tunnel_traffic.sh | 38 +++++++++++++ examples/vlan_learning.py | 1 - src/cc/export/helpers.h | 3 ++ src/cc/export/proto.h | 9 ++++ 7 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 examples/tunnel_monitor.c create mode 100644 examples/tunnel_monitor.py create mode 100644 examples/tunnel_stats.py create mode 100755 examples/tunnel_traffic.sh diff --git a/examples/tunnel_monitor.c b/examples/tunnel_monitor.c new file mode 100644 index 0000000..6401f3f --- /dev/null +++ b/examples/tunnel_monitor.c @@ -0,0 +1,129 @@ +#include + +struct ipkey { + u32 inner_sip; + u32 inner_dip; + u32 outer_sip; + u32 outer_dip; + u32 vni; +}; +struct counters { + u64 tx_pkts; + u64 rx_pkts; + u64 tx_bytes; + u64 rx_bytes; +}; + +BPF_TABLE("hash", struct ipkey, struct counters, stats, 1024); +BPF_TABLE("prog", int, int, parser, 10); + +enum cb_index { + CB_FLAGS = 0, + CB_SIP, + CB_DIP, + CB_VNI, + CB_OFFSET, +}; + +// helper func to swap two memory locations +static inline +void swap32(u32 *a, u32 *b) { + u32 t = *a; + *a = *b; + *b = t; +} + +// helper to swap the fields in an ipkey to give consistent ordering +static inline +void swap_ipkey(struct ipkey *key) { + swap32(&key->outer_sip, &key->outer_dip); + swap32(&key->inner_sip, &key->inner_dip); +} + +#define IS_INGRESS 0x1 +// initial handler for each packet on an ingress tc filter +int handle_ingress(struct __sk_buff *skb) { + skb->cb[CB_FLAGS] = IS_INGRESS; + parser.call(skb, 1); // jump to generic packet parser + return 1; +} + +// initial handler for each packet on an egress tc filter +int handle_egress(struct __sk_buff *skb) { + skb->cb[CB_FLAGS] = 0; + parser.call(skb, 1); // jump to generic packet parser + return 1; +} + +// parse the outer vxlan frame +int handle_outer(struct __sk_buff *skb) { + BEGIN(ethernet); + PROTO(ethernet) { + // filter bcast/mcast from the stats + if (ethernet->dst & (1ull << 40)) + return 1; + switch (ethernet->type) { + case 0x0800: goto ip; + } + goto EOP; + } + PROTO(ip) { + skb->cb[CB_SIP] = ip->src; + skb->cb[CB_DIP] = ip->dst; + switch (ip->nextp) { + case 17: goto udp; + } + goto EOP; + } + PROTO(udp) { + switch (udp->dport) { + case 4789: goto vxlan; + } + goto EOP; + } + PROTO(vxlan) { + skb->cb[CB_VNI] = vxlan->key; + skb->cb[CB_OFFSET] = (u64)vxlan + sizeof(*vxlan); + parser.call(skb, 2); + goto EOP; + } +EOP: + return 1; +} + +// Parse the inner frame, whatever it may be. If it is ipv4, add the inner +// source/dest ip to the key, for finer grained stats +int handle_inner(struct __sk_buff *skb) { + int is_ingress = skb->cb[CB_FLAGS] & IS_INGRESS; + struct ipkey key = { + .vni=skb->cb[CB_VNI], + .outer_sip = skb->cb[CB_SIP], + .outer_dip = skb->cb[CB_DIP] + }; + BEGIN_OFFSET(ethernet, skb->cb[CB_OFFSET]); + PROTO(ethernet) { + switch (ethernet->type) { + case 0x0800: goto ip; + } + goto EOP; + } + PROTO(ip) { + key.inner_sip = ip->src; + key.inner_dip = ip->dst; + goto EOP; + } +EOP: + // consistent ordering + if (key.outer_dip < key.outer_sip) + swap_ipkey(&key); + struct counters zleaf = {0}; + struct counters *leaf = stats.lookup_or_init(&key, &zleaf); + if (is_ingress) { + lock_xadd(&leaf->rx_pkts, 1); + lock_xadd(&leaf->rx_bytes, skb->len); + } else { + lock_xadd(&leaf->tx_pkts, 1); + lock_xadd(&leaf->tx_bytes, skb->len); + } + return 1; +} diff --git a/examples/tunnel_monitor.py b/examples/tunnel_monitor.py new file mode 100644 index 0000000..c97e53e --- /dev/null +++ b/examples/tunnel_monitor.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# Copyright (c) PLUMgrid, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") + +from bpf import BPF +from ctypes import c_uint, c_int, c_ulonglong, Structure +import json +from netaddr import IPAddress +from os import rename +from pyroute2 import IPRoute, NetNS, IPDB, NSPopen +import sys +from time import sleep + +ipr = IPRoute() +ipdb = IPDB(nl=ipr) + +b = BPF(src_file="tunnel_monitor.c", debug=0) +ingress_fn = b.load_func("handle_ingress", BPF.SCHED_CLS) +egress_fn = b.load_func("handle_egress", BPF.SCHED_CLS) +outer_fn = b.load_func("handle_outer", BPF.SCHED_CLS) +inner_fn = b.load_func("handle_inner", BPF.SCHED_CLS) +stats = b.get_table("stats") +parser = b.get_table("parser") +parser[c_int(1)] = c_int(outer_fn.fd) +parser[c_int(2)] = c_int(inner_fn.fd) + +ifc = ipdb.interfaces.eth0 + +# monitor one host...move this inside the netns to be more realistic +ipr.tc("add", "ingress", ifc.index, "ffff:") +ipr.tc("add-filter", "bpf", ifc.index, ":1", fd=ingress_fn.fd, + name=ingress_fn.name, parent="ffff:", action="ok", classid=1) +ipr.tc("add", "sfq", ifc.index, "1:") +ipr.tc("add-filter", "bpf", ifc.index, ":1", fd=egress_fn.fd, + name=egress_fn.name, parent="1:", action="ok", classid=1) + +def stats2json(k, v): + return { + "vni": int(k.vni), + "outer_sip": str(IPAddress(k.outer_sip)), + "outer_dip": str(IPAddress(k.outer_dip)), + "inner_sip": str(IPAddress(k.inner_sip)), + "inner_dip": str(IPAddress(k.inner_dip)), + "tx_pkts": v.tx_pkts, "tx_bytes": v.tx_bytes, + "rx_pkts": v.rx_pkts, "rx_bytes": v.rx_bytes, + } + +def delta_stats(v, oldv): + return stats.Leaf(v.tx_pkts - oldv.tx_pkts, v.rx_pkts - oldv.rx_pkts, + v.tx_bytes - oldv.tx_bytes, v.rx_bytes - oldv.rx_bytes) +def key2str(k): + return "%s,%s,%d,%s,%s" % (IPAddress(k.outer_sip), IPAddress(k.outer_dip), k.vni, + IPAddress(k.inner_sip), IPAddress(k.inner_dip)) + +prev = {} + +while True: + result_total = [] + result_delta = [] + tmp = {} + # compute both the total and last-N-seconds statistics + for k, v in stats.items(): + # subtract the previous totals from the current, or 0 if none exists + v2 = delta_stats(v, prev.get(key2str(k), stats.Leaf(0, 0, 0, 0))) + if v2.tx_pkts != 0 or v2.rx_pkts != 0: + result_delta.append(stats2json(k, v2)) + tmp[key2str(k)] = v + result_total.append(stats2json(k, v)) + + prev = tmp + + with open("/root/chord-transitions/data/tunnel.json.new", "w") as f: + json.dump(result_total, f) + rename("/root/chord-transitions/data/tunnel.json.new", "/root/chord-transitions/data/tunnel.json") + with open("/root/chord-transitions/data/tunnel-delta.json.new", "w") as f: + json.dump(result_delta, f) + rename("/root/chord-transitions/data/tunnel-delta.json.new", "/root/chord-transitions/data/tunnel-delta.json") + sleep(5) +ipdb.release() + diff --git a/examples/tunnel_stats.py b/examples/tunnel_stats.py new file mode 100644 index 0000000..10ea91b --- /dev/null +++ b/examples/tunnel_stats.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# Copyright (c) PLUMgrid, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") + +from builtins import input +from netaddr import IPNetwork +from pyroute2 import IPRoute, NetNS, IPDB, NSPopen +from random import choice, randint +from simulation import Simulation +from socket import htons +import sys + +ipr = IPRoute() +ipdb = IPDB(nl=ipr) + +num_hosts = 9 +num_vnis = 4 +null = open("/dev/null", "w") + +class TunnelSimulation(Simulation): + def __init__(self, ipdb): + super(TunnelSimulation, self).__init__(ipdb) + self.available_ips = [list(IPNetwork("192.168.%d.0/24" % i)[1:-1]) + for i in range(0, num_vnis)] + + def start(self): + # each entry is tuple of ns_ipdb, out_ifc, in_ifc + host_info = [] + for i in range(0, num_hosts): + ipaddr = "172.16.1.%d/24" % (100 + i) + host_info.append(self._create_ns("host%d" % i, ipaddr=ipaddr)) + with self.ipdb.create(ifname="br100", kind="bridge") as br100: + for host in host_info: br100.add_port(host[1]) + br100.up() + # create a vxlan device inside each namespace + for host in host_info: + cmd = ["netserver", "-D"] + self.processes.append(NSPopen(host[0].nl.netns, cmd, stdout=null)) + for i in range(0, num_vnis): + with host[0].create(ifname="vxlan%d" % i, kind="vxlan", vxlan_id=10000 + i, + vxlan_link=host[0].interfaces.eth0, + vxlan_port=htons(4789), vxlan_group="239.1.1.%d" % (1 + i)) as vx: + vx.up() + with host[0].create(ifname="br%d" % i, kind="bridge") as br: + br.add_port(host[0].interfaces["vxlan%d" % i]) + br.up() + with host[0].create(ifname="c%da" % i, kind="veth", + peer="c%db" % i) as c: + c.up() + c.add_ip("%s/24" % self.available_ips[i].pop(0)) + c.mtu = 1450 + br.add_port(host[0].interfaces["c%db" % i]) + host[0].interfaces["c%db" % i].up().commit() + + # pick one host to start the monitor in + host = host_info[0] + cmd = ["python", "tunnel_monitor.py"] + p = NSPopen(host[0].nl.netns, cmd) + self.processes.append(p) + +try: + sim = TunnelSimulation(ipdb) + sim.start() + input("Press enter to quit:") +finally: + if "br100" in ipdb.interfaces: ipdb.interfaces.br100.remove().commit() + sim.release() + ipdb.release() + null.close() diff --git a/examples/tunnel_traffic.sh b/examples/tunnel_traffic.sh new file mode 100755 index 0000000..8188e92 --- /dev/null +++ b/examples/tunnel_traffic.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +B=/usr/bin/byobu +S=tunnel1 + +tmux has-session -t $S &> /dev/null + +if [[ $? != 0 ]]; then + $B new-session -s $S -n "c1" -d + tmux send -t $S "ip netns exec host0 ping 192.168.0.1 -s512" C-m + tmux new-window -t $S -n "c2" + tmux send -t $S "ip netns exec host0 ping 192.168.0.2 -s128" C-m + tmux new-window -t $S -n "c3" + tmux send -t $S "ip netns exec host0 ping 192.168.0.3 -s1024" C-m + tmux new-window -t $S -n "c3" + tmux send -t $S "ip netns exec host0 ping 192.168.0.4 -s128" C-m + tmux new-window -t $S -n "c3" + tmux send -t $S "ip netns exec host0 ping 192.168.0.5 -s128" C-m + tmux new-window -t $S -n "c3" + tmux send -t $S "ip netns exec host0 ping 192.168.0.6 -s128" C-m + tmux new-window -t $S -n "c4" + tmux send -t $S "ip netns exec host0 ping 192.168.1.2 -s128" C-m + tmux new-window -t $S -n "c5" + tmux send -t $S "ip netns exec host0 ping 192.168.1.4 -s768" C-m + tmux new-window -t $S -n "c2" + tmux send -t $S "ip netns exec host0 ping 192.168.2.2 -s128" C-m + tmux new-window -t $S -n "c3" + tmux send -t $S "ip netns exec host0 ping 192.168.2.7 -s1024" C-m + tmux new-window -t $S -n "c4" + tmux send -t $S "ip netns exec host0 ping 192.168.2.2 -s128" C-m + tmux new-window -t $S -n "c5" + tmux send -t $S "ip netns exec host0 ping 192.168.3.8 -s768" C-m + tmux new-window -t $S -n "c5" + tmux send -t $S "ip netns exec host0 ping 192.168.3.9 -s768" C-m +fi + +exec tmux attach -t $S + diff --git a/examples/vlan_learning.py b/examples/vlan_learning.py index 4ae9c8a..e4e5f50 100644 --- a/examples/vlan_learning.py +++ b/examples/vlan_learning.py @@ -25,7 +25,6 @@ from bpf import BPF from builtins import input -from ctypes import c_uint, c_int, c_ulonglong, Structure from pyroute2 import IPRoute, NetNS, IPDB, NSPopen from random import shuffle from time import sleep diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h index 8bfa54f..2328600 100644 --- a/src/cc/export/helpers.h +++ b/src/cc/export/helpers.h @@ -45,6 +45,9 @@ struct _name##_table_t _name #define BEGIN(next) \ u64 _parse_cursor = 0; \ goto next +#define BEGIN_OFFSET(next, _base_offset) \ + u64 _parse_cursor = (_base_offset); \ + goto next #define PROTO(name) \ goto EOP; \ diff --git a/src/cc/export/proto.h b/src/cc/export/proto.h index c6de80e..408b64b 100644 --- a/src/cc/export/proto.h +++ b/src/cc/export/proto.h @@ -82,3 +82,12 @@ struct tcp_t { unsigned short cksum; // byte 16 unsigned short urg_ptr; } __attribute__((packed)); + +struct vxlan_t { + unsigned int rsv1:4; + unsigned int iflag:1; + unsigned int rsv2:3; + unsigned int rsv3:24; + unsigned int key:24; + unsigned int rsv4:8; +} __attribute__((packed)); -- 2.7.4