net: enetc: add support for XDP_REDIRECT
authorVladimir Oltean <vladimir.oltean@nxp.com>
Wed, 31 Mar 2021 20:08:57 +0000 (23:08 +0300)
committerDavid S. Miller <davem@davemloft.net>
Wed, 31 Mar 2021 21:57:44 +0000 (14:57 -0700)
The driver implementation of the XDP_REDIRECT action reuses parts from
XDP_TX, most notably the enetc_xdp_tx function which transmits an array
of TX software BDs. Only this time, the buffers don't have DMA mappings,
we need to create them.

When a BPF program reaches the XDP_REDIRECT verdict for a frame, we can
employ the same buffer reuse strategy as for the normal processing path
and for XDP_PASS: we can flip to the other page half and seed that to
the RX ring.

Note that scatter/gather support is there, but disabled due to lack of
multi-buffer support in XDP (which is added by this series):
https://patchwork.kernel.org/project/netdevbpf/cover/cover.1616179034.git.lorenzo@kernel.org/

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/freescale/enetc/enetc.c
drivers/net/ethernet/freescale/enetc/enetc.h
drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c

index ba5313a..57049ae 100644 (file)
@@ -8,6 +8,23 @@
 #include <linux/vmalloc.h>
 #include <net/pkt_sched.h>
 
+static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd)
+{
+       if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect)
+               return NULL;
+
+       return tx_swbd->skb;
+}
+
+static struct xdp_frame *
+enetc_tx_swbd_get_xdp_frame(struct enetc_tx_swbd *tx_swbd)
+{
+       if (tx_swbd->is_xdp_redirect)
+               return tx_swbd->xdp_frame;
+
+       return NULL;
+}
+
 static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
                                struct enetc_tx_swbd *tx_swbd)
 {
@@ -25,14 +42,20 @@ static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
        tx_swbd->dma = 0;
 }
 
-static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
-                             struct enetc_tx_swbd *tx_swbd)
+static void enetc_free_tx_frame(struct enetc_bdr *tx_ring,
+                               struct enetc_tx_swbd *tx_swbd)
 {
+       struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
+       struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);
+
        if (tx_swbd->dma)
                enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
-       if (tx_swbd->skb) {
-               dev_kfree_skb_any(tx_swbd->skb);
+       if (xdp_frame) {
+               xdp_return_frame(tx_swbd->xdp_frame);
+               tx_swbd->xdp_frame = NULL;
+       } else if (skb) {
+               dev_kfree_skb_any(skb);
                tx_swbd->skb = NULL;
        }
 }
@@ -183,7 +206,7 @@ dma_err:
 
        do {
                tx_swbd = &tx_ring->tx_swbd[i];
-               enetc_free_tx_skb(tx_ring, tx_swbd);
+               enetc_free_tx_frame(tx_ring, tx_swbd);
                if (i == 0)
                        i = tx_ring->bd_count;
                i--;
@@ -381,6 +404,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
        do_tstamp = false;
 
        while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
+               struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
+               struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);
+
                if (unlikely(tx_swbd->check_wb)) {
                        struct enetc_ndev_priv *priv = netdev_priv(ndev);
                        union enetc_tx_bd *txbd;
@@ -400,12 +426,15 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
                else if (likely(tx_swbd->dma))
                        enetc_unmap_tx_buff(tx_ring, tx_swbd);
 
-               if (tx_swbd->skb) {
+               if (xdp_frame) {
+                       xdp_return_frame(xdp_frame);
+                       tx_swbd->xdp_frame = NULL;
+               } else if (skb) {
                        if (unlikely(do_tstamp)) {
-                               enetc_tstamp_tx(tx_swbd->skb, tstamp);
+                               enetc_tstamp_tx(skb, tstamp);
                                do_tstamp = false;
                        }
-                       napi_consume_skb(tx_swbd->skb, napi_budget);
+                       napi_consume_skb(skb, napi_budget);
                        tx_swbd->skb = NULL;
                }
 
@@ -827,6 +856,109 @@ static bool enetc_xdp_tx(struct enetc_bdr *tx_ring,
        return true;
 }
 
+static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring,
+                                         struct enetc_tx_swbd *xdp_tx_arr,
+                                         struct xdp_frame *xdp_frame)
+{
+       struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[0];
+       struct skb_shared_info *shinfo;
+       void *data = xdp_frame->data;
+       int len = xdp_frame->len;
+       skb_frag_t *frag;
+       dma_addr_t dma;
+       unsigned int f;
+       int n = 0;
+
+       dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
+       if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
+               netdev_err(tx_ring->ndev, "DMA map error\n");
+               return -1;
+       }
+
+       xdp_tx_swbd->dma = dma;
+       xdp_tx_swbd->dir = DMA_TO_DEVICE;
+       xdp_tx_swbd->len = len;
+       xdp_tx_swbd->is_xdp_redirect = true;
+       xdp_tx_swbd->is_eof = false;
+       xdp_tx_swbd->xdp_frame = NULL;
+
+       n++;
+       xdp_tx_swbd = &xdp_tx_arr[n];
+
+       shinfo = xdp_get_shared_info_from_frame(xdp_frame);
+
+       for (f = 0, frag = &shinfo->frags[0]; f < shinfo->nr_frags;
+            f++, frag++) {
+               data = skb_frag_address(frag);
+               len = skb_frag_size(frag);
+
+               dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
+               if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
+                       /* Undo the DMA mapping for all fragments */
+                       while (n-- >= 0)
+                               enetc_unmap_tx_buff(tx_ring, &xdp_tx_arr[n]);
+
+                       netdev_err(tx_ring->ndev, "DMA map error\n");
+                       return -1;
+               }
+
+               xdp_tx_swbd->dma = dma;
+               xdp_tx_swbd->dir = DMA_TO_DEVICE;
+               xdp_tx_swbd->len = len;
+               xdp_tx_swbd->is_xdp_redirect = true;
+               xdp_tx_swbd->is_eof = false;
+               xdp_tx_swbd->xdp_frame = NULL;
+
+               n++;
+               xdp_tx_swbd = &xdp_tx_arr[n];
+       }
+
+       xdp_tx_arr[n - 1].is_eof = true;
+       xdp_tx_arr[n - 1].xdp_frame = xdp_frame;
+
+       return n;
+}
+
+int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
+                  struct xdp_frame **frames, u32 flags)
+{
+       struct enetc_tx_swbd xdp_redirect_arr[ENETC_MAX_SKB_FRAGS] = {0};
+       struct enetc_ndev_priv *priv = netdev_priv(ndev);
+       struct enetc_bdr *tx_ring;
+       int xdp_tx_bd_cnt, i, k;
+       int xdp_tx_frm_cnt = 0;
+
+       tx_ring = priv->tx_ring[smp_processor_id()];
+
+       prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use));
+
+       for (k = 0; k < num_frames; k++) {
+               xdp_tx_bd_cnt = enetc_xdp_frame_to_xdp_tx_swbd(tx_ring,
+                                                              xdp_redirect_arr,
+                                                              frames[k]);
+               if (unlikely(xdp_tx_bd_cnt < 0))
+                       break;
+
+               if (unlikely(!enetc_xdp_tx(tx_ring, xdp_redirect_arr,
+                                          xdp_tx_bd_cnt))) {
+                       for (i = 0; i < xdp_tx_bd_cnt; i++)
+                               enetc_unmap_tx_buff(tx_ring,
+                                                   &xdp_redirect_arr[i]);
+                       tx_ring->stats.xdp_tx_drops++;
+                       break;
+               }
+
+               xdp_tx_frm_cnt++;
+       }
+
+       if (unlikely((flags & XDP_XMIT_FLUSH) || k != xdp_tx_frm_cnt))
+               enetc_update_tx_ring_tail(tx_ring);
+
+       tx_ring->stats.xdp_tx += xdp_tx_frm_cnt;
+
+       return xdp_tx_frm_cnt;
+}
+
 static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
                                     struct xdp_buff *xdp_buff, u16 size)
 {
@@ -948,14 +1080,31 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
        rx_ring->stats.xdp_drops++;
 }
 
+static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first,
+                          int rx_ring_last)
+{
+       while (rx_ring_first != rx_ring_last) {
+               struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
+
+               if (rx_swbd->page) {
+                       dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
+                                      rx_swbd->dir);
+                       __free_page(rx_swbd->page);
+                       rx_swbd->page = NULL;
+               }
+               enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
+       }
+       rx_ring->stats.xdp_redirect_failures++;
+}
+
 static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
                                   struct napi_struct *napi, int work_limit,
                                   struct bpf_prog *prog)
 {
+       int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0;
        struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0};
        struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
        struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index];
-       int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0;
        int rx_frm_cnt = 0, rx_byte_cnt = 0;
        int cleaned_cnt, i;
        u32 xdp_act;
@@ -969,6 +1118,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
                int orig_i, orig_cleaned_cnt;
                struct xdp_buff xdp_buff;
                struct sk_buff *skb;
+               int tmp_orig_i, err;
                u32 bd_status;
 
                rxbd = enetc_rxbd(rx_ring, i);
@@ -1027,6 +1177,43 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
                                xdp_tx_frm_cnt++;
                        }
                        break;
+               case XDP_REDIRECT:
+                       /* xdp_return_frame does not support S/G in the sense
+                        * that it leaks the fragments (__xdp_return should not
+                        * call page_frag_free only for the initial buffer).
+                        * Until XDP_REDIRECT gains support for S/G let's keep
+                        * the code structure in place, but dead. We drop the
+                        * S/G frames ourselves to avoid memory leaks which
+                        * would otherwise leave the kernel OOM.
+                        */
+                       if (unlikely(cleaned_cnt - orig_cleaned_cnt != 1)) {
+                               enetc_xdp_drop(rx_ring, orig_i, i);
+                               rx_ring->stats.xdp_redirect_sg++;
+                               break;
+                       }
+
+                       tmp_orig_i = orig_i;
+
+                       while (orig_i != i) {
+                               enetc_put_rx_buff(rx_ring,
+                                                 &rx_ring->rx_swbd[orig_i]);
+                               enetc_bdr_idx_inc(rx_ring, &orig_i);
+                       }
+
+                       err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog);
+                       if (unlikely(err)) {
+                               enetc_xdp_free(rx_ring, tmp_orig_i, i);
+                       } else {
+                               xdp_redirect_frm_cnt++;
+                               rx_ring->stats.xdp_redirect++;
+                       }
+
+                       if (unlikely(xdp_redirect_frm_cnt > ENETC_DEFAULT_TX_WORK)) {
+                               xdp_do_flush_map();
+                               xdp_redirect_frm_cnt = 0;
+                       }
+
+                       break;
                default:
                        bpf_warn_invalid_xdp_action(xdp_act);
                }
@@ -1039,6 +1226,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
        rx_ring->stats.packets += rx_frm_cnt;
        rx_ring->stats.bytes += rx_byte_cnt;
 
+       if (xdp_redirect_frm_cnt)
+               xdp_do_flush_map();
+
        if (xdp_tx_frm_cnt)
                enetc_update_tx_ring_tail(tx_ring);
 
@@ -1173,7 +1363,7 @@ static void enetc_free_txbdr(struct enetc_bdr *txr)
        int size, i;
 
        for (i = 0; i < txr->bd_count; i++)
-               enetc_free_tx_skb(txr, &txr->tx_swbd[i]);
+               enetc_free_tx_frame(txr, &txr->tx_swbd[i]);
 
        size = txr->bd_count * sizeof(union enetc_tx_bd);
 
@@ -1290,7 +1480,7 @@ static void enetc_free_tx_ring(struct enetc_bdr *tx_ring)
        for (i = 0; i < tx_ring->bd_count; i++) {
                struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i];
 
-               enetc_free_tx_skb(tx_ring, tx_swbd);
+               enetc_free_tx_frame(tx_ring, tx_swbd);
        }
 
        tx_ring->next_to_clean = 0;
index d0619fc..05474f4 100644 (file)
                                (ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN))
 
 struct enetc_tx_swbd {
-       struct sk_buff *skb;
+       union {
+               struct sk_buff *skb;
+               struct xdp_frame *xdp_frame;
+       };
        dma_addr_t dma;
        struct page *page;      /* valid only if is_xdp_tx */
        u16 page_offset;        /* valid only if is_xdp_tx */
@@ -30,6 +33,7 @@ struct enetc_tx_swbd {
        u8 do_tstamp:1;
        u8 is_eof:1;
        u8 is_xdp_tx:1;
+       u8 is_xdp_redirect:1;
 };
 
 #define ENETC_RX_MAXFRM_SIZE   ENETC_MAC_MAXFRM_SIZE
@@ -61,6 +65,9 @@ struct enetc_ring_stats {
        unsigned int xdp_drops;
        unsigned int xdp_tx;
        unsigned int xdp_tx_drops;
+       unsigned int xdp_redirect;
+       unsigned int xdp_redirect_failures;
+       unsigned int xdp_redirect_sg;
        unsigned int recycles;
        unsigned int recycle_failures;
 };
@@ -354,6 +361,8 @@ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
 int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
                   void *type_data);
 int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
+int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
+                  struct xdp_frame **frames, u32 flags);
 
 /* ethtool */
 void enetc_set_ethtool_ops(struct net_device *ndev);
index 37821a8..7cc81b4 100644 (file)
@@ -195,6 +195,9 @@ static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
        "Rx ring %2d XDP drops",
        "Rx ring %2d recycles",
        "Rx ring %2d recycle failures",
+       "Rx ring %2d redirects",
+       "Rx ring %2d redirect failures",
+       "Rx ring %2d redirect S/G",
 };
 
 static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
@@ -284,6 +287,9 @@ static void enetc_get_ethtool_stats(struct net_device *ndev,
                data[o++] = priv->rx_ring[i]->stats.xdp_drops;
                data[o++] = priv->rx_ring[i]->stats.recycles;
                data[o++] = priv->rx_ring[i]->stats.recycle_failures;
+               data[o++] = priv->rx_ring[i]->stats.xdp_redirect;
+               data[o++] = priv->rx_ring[i]->stats.xdp_redirect_failures;
+               data[o++] = priv->rx_ring[i]->stats.xdp_redirect_sg;
        }
 
        if (!enetc_si_is_pf(priv->si))
index 0484dbe..f61fedf 100644 (file)
@@ -708,6 +708,7 @@ static const struct net_device_ops enetc_ndev_ops = {
        .ndo_do_ioctl           = enetc_ioctl,
        .ndo_setup_tc           = enetc_setup_tc,
        .ndo_bpf                = enetc_setup_bpf,
+       .ndo_xdp_xmit           = enetc_xdp_xmit,
 };
 
 static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,