--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019, Microsoft Corporation.
+ *
+ * Author:
+ * Haiyang Zhang <haiyangz@microsoft.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/kernel.h>
+#include <net/xdp.h>
+
+#include <linux/mutex.h>
+#include <linux/rtnetlink.h>
+
+#include "hyperv_net.h"
+
+u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
+ struct xdp_buff *xdp)
+{
+ void *data = nvchan->rsc.data[0];
+ u32 len = nvchan->rsc.len[0];
+ struct page *page = NULL;
+ struct bpf_prog *prog;
+ u32 act = XDP_PASS;
+
+ xdp->data_hard_start = NULL;
+
+ rcu_read_lock();
+ prog = rcu_dereference(nvchan->bpf_prog);
+
+ if (!prog)
+ goto out;
+
+ /* allocate page buffer for data */
+ page = alloc_page(GFP_ATOMIC);
+ if (!page) {
+ act = XDP_DROP;
+ goto out;
+ }
+
+ xdp->data_hard_start = page_address(page);
+ xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM;
+ xdp_set_data_meta_invalid(xdp);
+ xdp->data_end = xdp->data + len;
+ xdp->rxq = &nvchan->xdp_rxq;
+ xdp->handle = 0;
+
+ memcpy(xdp->data, data, len);
+
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ case XDP_TX:
+ case XDP_DROP:
+ break;
+
+ case XDP_ABORTED:
+ trace_xdp_exception(ndev, prog, act);
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ }
+
+out:
+ rcu_read_unlock();
+
+ if (page && act != XDP_PASS && act != XDP_TX) {
+ __free_page(page);
+ xdp->data_hard_start = NULL;
+ }
+
+ return act;
+}
+
+unsigned int netvsc_xdp_fraglen(unsigned int len)
+{
+ return SKB_DATA_ALIGN(len) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev)
+{
+ return rtnl_dereference(nvdev->chan_table[0].bpf_prog);
+}
+
+int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack,
+ struct netvsc_device *nvdev)
+{
+ struct bpf_prog *old_prog;
+ int buf_max, i;
+
+ old_prog = netvsc_xdp_get(nvdev);
+
+ if (!old_prog && !prog)
+ return 0;
+
+ buf_max = NETVSC_XDP_HDRM + netvsc_xdp_fraglen(dev->mtu + ETH_HLEN);
+ if (prog && buf_max > PAGE_SIZE) {
+ netdev_err(dev, "XDP: mtu:%u too large, buf_max:%u\n",
+ dev->mtu, buf_max);
+ NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
+
+ return -EOPNOTSUPP;
+ }
+
+ if (prog && (dev->features & NETIF_F_LRO)) {
+ netdev_err(dev, "XDP: not support LRO\n");
+ NL_SET_ERR_MSG_MOD(extack, "XDP: not support LRO");
+
+ return -EOPNOTSUPP;
+ }
+
+ if (prog)
+ bpf_prog_add(prog, nvdev->num_chn);
+
+ for (i = 0; i < nvdev->num_chn; i++)
+ rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog);
+
+ if (old_prog)
+ for (i = 0; i < nvdev->num_chn; i++)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
+{
+ struct netdev_bpf xdp;
+ bpf_op_t ndo_bpf;
+
+ ASSERT_RTNL();
+
+ if (!vf_netdev)
+ return 0;
+
+ ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
+ if (!ndo_bpf)
+ return 0;
+
+ memset(&xdp, 0, sizeof(xdp));
+
+ xdp.command = XDP_SETUP_PROG;
+ xdp.prog = prog;
+
+ return ndo_bpf(vf_netdev, &xdp);
+}
+
+static u32 netvsc_xdp_query(struct netvsc_device *nvdev)
+{
+ struct bpf_prog *prog = netvsc_xdp_get(nvdev);
+
+ if (prog)
+ return prog->aux->id;
+
+ return 0;
+}
+
+int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ struct net_device_context *ndevctx = netdev_priv(dev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+ struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
+ struct netlink_ext_ack *extack = bpf->extack;
+ int ret;
+
+ if (!nvdev || nvdev->destroy) {
+ if (bpf->command == XDP_QUERY_PROG) {
+ bpf->prog_id = 0;
+ return 0; /* Query must always succeed */
+ } else {
+ return -ENODEV;
+ }
+ }
+
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ ret = netvsc_xdp_set(dev, bpf->prog, extack, nvdev);
+
+ if (ret)
+ return ret;
+
+ ret = netvsc_vf_setxdp(vf_netdev, bpf->prog);
+
+ if (ret) {
+ netdev_err(dev, "vf_setxdp failed:%d\n", ret);
+ NL_SET_ERR_MSG_MOD(extack, "vf_setxdp failed");
+
+ netvsc_xdp_set(dev, NULL, extack, nvdev);
+ }
+
+ return ret;
+
+ case XDP_QUERY_PROG:
+ bpf->prog_id = netvsc_xdp_query(nvdev);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
#include <linux/slab.h>
#include <linux/rtnetlink.h>
#include <linux/netpoll.h>
+#include <linux/bpf.h>
#include <net/arp.h>
#include <net/route.h>
return rc;
}
-static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
+static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_netvsc_packet *packet = NULL;
/* timestamp packet in software */
skb_tx_timestamp(skb);
- ret = netvsc_send(net, packet, rndis_msg, pb, skb);
+ ret = netvsc_send(net, packet, rndis_msg, pb, skb, xdp_tx);
if (likely(ret == 0))
return NETDEV_TX_OK;
goto drop;
}
+static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ return netvsc_xmit(skb, ndev, false);
+}
+
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
schedule_delayed_work(&ndev_ctx->dwork, 0);
}
+static void netvsc_xdp_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+ int rc;
+
+ skb->queue_mapping = skb_get_rx_queue(skb);
+ __skb_push(skb, ETH_HLEN);
+
+ rc = netvsc_xmit(skb, ndev, true);
+
+ if (dev_xmit_complete(rc))
+ return;
+
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+}
+
static void netvsc_comp_ipcsum(struct sk_buff *skb)
{
struct iphdr *iph = (struct iphdr *)skb->data;
}
static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
- struct netvsc_channel *nvchan)
+ struct netvsc_channel *nvchan,
+ struct xdp_buff *xdp)
{
struct napi_struct *napi = &nvchan->napi;
const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
nvchan->rsc.csum_info;
const u32 *hash_info = nvchan->rsc.hash_info;
struct sk_buff *skb;
+ void *xbuf = xdp->data_hard_start;
int i;
- skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
- if (!skb)
- return skb;
+ if (xbuf) {
+ unsigned int hdroom = xdp->data - xdp->data_hard_start;
+ unsigned int xlen = xdp->data_end - xdp->data;
+ unsigned int frag_size = netvsc_xdp_fraglen(hdroom + xlen);
- /*
- * Copy to skb. This copy is needed here since the memory pointed by
- * hv_netvsc_packet cannot be deallocated
- */
- for (i = 0; i < nvchan->rsc.cnt; i++)
- skb_put_data(skb, nvchan->rsc.data[i], nvchan->rsc.len[i]);
+ skb = build_skb(xbuf, frag_size);
+
+ if (!skb) {
+ __free_page(virt_to_page(xbuf));
+ return NULL;
+ }
+
+ skb_reserve(skb, hdroom);
+ skb_put(skb, xlen);
+ skb->dev = napi->dev;
+ } else {
+ skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
+
+ if (!skb)
+ return NULL;
+
+ /* Copy to skb. This copy is needed here since the memory
+ * pointed by hv_netvsc_packet cannot be deallocated.
+ */
+ for (i = 0; i < nvchan->rsc.cnt; i++)
+ skb_put_data(skb, nvchan->rsc.data[i],
+ nvchan->rsc.len[i]);
+ }
skb->protocol = eth_type_trans(skb, net);
struct vmbus_channel *channel = nvchan->channel;
u16 q_idx = channel->offermsg.offer.sub_channel_index;
struct sk_buff *skb;
- struct netvsc_stats *rx_stats;
+ struct netvsc_stats *rx_stats = &nvchan->rx_stats;
+ struct xdp_buff xdp;
+ u32 act;
if (net->reg_state != NETREG_REGISTERED)
return NVSP_STAT_FAIL;
+ act = netvsc_run_xdp(net, nvchan, &xdp);
+
+ if (act != XDP_PASS && act != XDP_TX) {
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->xdp_drop++;
+ u64_stats_update_end(&rx_stats->syncp);
+
+ return NVSP_STAT_SUCCESS; /* consumed by XDP */
+ }
+
/* Allocate a skb - TODO direct I/O to pages? */
- skb = netvsc_alloc_recv_skb(net, nvchan);
+ skb = netvsc_alloc_recv_skb(net, nvchan, &xdp);
if (unlikely(!skb)) {
++net_device_ctx->eth_stats.rx_no_memory;
* on the synthetic device because modifying the VF device
* statistics will not work correctly.
*/
- rx_stats = &nvchan->rx_stats;
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++;
rx_stats->bytes += nvchan->rsc.pktlen;
++rx_stats->multicast;
u64_stats_update_end(&rx_stats->syncp);
+ if (act == XDP_TX) {
+ netvsc_xdp_xmit(skb, net);
+ return NVSP_STAT_SUCCESS;
+ }
+
napi_gro_receive(&nvchan->napi, skb);
return NVSP_STAT_SUCCESS;
}
/* Alloc struct netvsc_device_info, and initialize it from either existing
* struct netvsc_device, or from default values.
*/
-static struct netvsc_device_info *netvsc_devinfo_get
- (struct netvsc_device *nvdev)
+static
+struct netvsc_device_info *netvsc_devinfo_get(struct netvsc_device *nvdev)
{
struct netvsc_device_info *dev_info;
+ struct bpf_prog *prog;
dev_info = kzalloc(sizeof(*dev_info), GFP_ATOMIC);
return NULL;
if (nvdev) {
+ ASSERT_RTNL();
+
dev_info->num_chn = nvdev->num_chn;
dev_info->send_sections = nvdev->send_section_cnt;
dev_info->send_section_size = nvdev->send_section_size;
memcpy(dev_info->rss_key, nvdev->extension->rss_key,
NETVSC_HASH_KEYLEN);
+
+ prog = netvsc_xdp_get(nvdev);
+ if (prog) {
+ bpf_prog_inc(prog);
+ dev_info->bprog = prog;
+ }
} else {
dev_info->num_chn = VRSS_CHANNEL_DEFAULT;
dev_info->send_sections = NETVSC_DEFAULT_TX;
return dev_info;
}
+/* Free struct netvsc_device_info */
+static void netvsc_devinfo_put(struct netvsc_device_info *dev_info)
+{
+ if (dev_info->bprog) {
+ ASSERT_RTNL();
+ bpf_prog_put(dev_info->bprog);
+ }
+
+ kfree(dev_info);
+}
+
static int netvsc_detach(struct net_device *ndev,
struct netvsc_device *nvdev)
{
if (cancel_work_sync(&nvdev->subchan_work))
nvdev->num_chn = 1;
+ netvsc_xdp_set(ndev, NULL, NULL, nvdev);
+
/* If device was up (receiving) then shutdown */
if (netif_running(ndev)) {
netvsc_tx_disable(nvdev, ndev);
struct hv_device *hdev = ndev_ctx->device_ctx;
struct netvsc_device *nvdev;
struct rndis_device *rdev;
- int ret;
+ struct bpf_prog *prog;
+ int ret = 0;
nvdev = rndis_filter_device_add(hdev, dev_info);
if (IS_ERR(nvdev))
}
}
+ prog = dev_info->bprog;
+ if (prog) {
+ ret = netvsc_xdp_set(ndev, prog, NULL, nvdev);
+ if (ret)
+ goto err1;
+ }
+
/* In any case device is now ready */
netif_device_attach(ndev);
if (netif_running(ndev)) {
ret = rndis_filter_open(nvdev);
if (ret)
- goto err;
+ goto err2;
rdev = nvdev->extension;
if (!rdev->link_state)
return 0;
-err:
+err2:
netif_device_detach(ndev);
+err1:
rndis_filter_device_remove(hdev, nvdev);
return ret;
}
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
}
dev_set_mtu(vf_netdev, orig_mtu);
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
}
/* statistics per queue (rx/tx packets/bytes) */
#define NETVSC_PCPU_STATS_LEN (num_present_cpus() * ARRAY_SIZE(pcpu_stats))
-/* 4 statistics per queue (rx/tx packets/bytes) */
-#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
+/* 5 statistics per queue (rx/tx packets/bytes, rx xdp_drop) */
+#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 5)
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
struct netvsc_ethtool_pcpu_stats *pcpu_sum;
unsigned int start;
u64 packets, bytes;
+ u64 xdp_drop;
int i, j, cpu;
if (!nvdev)
start = u64_stats_fetch_begin_irq(&qstats->syncp);
packets = qstats->packets;
bytes = qstats->bytes;
+ xdp_drop = qstats->xdp_drop;
} while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_drop;
}
pcpu_sum = kvmalloc_array(num_possible_cpus(),
p += ETH_GSTRING_LEN;
sprintf(p, "rx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_xdp_drop", i);
+ p += ETH_GSTRING_LEN;
}
for_each_present_cpu(cpu) {
}
out:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return ret;
}
+static netdev_features_t netvsc_fix_features(struct net_device *ndev,
+ netdev_features_t features)
+{
+ struct net_device_context *ndevctx = netdev_priv(ndev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+
+ if (!nvdev || nvdev->destroy)
+ return features;
+
+ if ((features & NETIF_F_LRO) && netvsc_xdp_get(nvdev)) {
+ features ^= NETIF_F_LRO;
+ netdev_info(ndev, "Skip LRO - unsupported with XDP\n");
+ }
+
+ return features;
+}
+
static int netvsc_set_features(struct net_device *ndev,
netdev_features_t features)
{
.ndo_start_xmit = netvsc_start_xmit,
.ndo_change_rx_flags = netvsc_change_rx_flags,
.ndo_set_rx_mode = netvsc_set_rx_mode,
+ .ndo_fix_features = netvsc_fix_features,
.ndo_set_features = netvsc_set_features,
.ndo_change_mtu = netvsc_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = netvsc_set_mac_addr,
.ndo_select_queue = netvsc_select_queue,
.ndo_get_stats64 = netvsc_get_stats64,
+ .ndo_bpf = netvsc_bpf,
};
/*
{
struct net_device_context *net_device_ctx;
struct netvsc_device *netvsc_dev;
+ struct bpf_prog *prog;
struct net_device *ndev;
int ret;
vf_netdev->wanted_features = ndev->features;
netdev_update_features(vf_netdev);
+ prog = netvsc_xdp_get(netvsc_dev);
+ netvsc_vf_setxdp(vf_netdev, prog);
+
return NOTIFY_OK;
}
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
+ netvsc_vf_setxdp(vf_netdev, NULL);
+
netdev_rx_handler_unregister(vf_netdev);
netdev_upper_dev_unlink(vf_netdev, ndev);
RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
list_add(&net_device_ctx->list, &netvsc_dev_list);
rtnl_unlock();
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
return 0;
register_failed:
rtnl_unlock();
rndis_filter_device_remove(dev, nvdev);
rndis_failed:
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
devinfo_failed:
free_percpu(net_device_ctx->vf_stats);
no_stats:
rtnl_lock();
nvdev = rtnl_dereference(ndev_ctx->nvdev);
- if (nvdev)
+ if (nvdev) {
cancel_work_sync(&nvdev->subchan_work);
+ netvsc_xdp_set(net, NULL, NULL, nvdev);
+ }
/*
* Call to the vsc driver to let it know that the device is being
ret = netvsc_attach(net, device_info);
- rtnl_unlock();
-
- kfree(device_info);
+ netvsc_devinfo_put(device_info);
net_device_ctx->saved_netvsc_dev_info = NULL;
+ rtnl_unlock();
+
return ret;
}
static const struct hv_vmbus_device_id id_table[] = {