net: mvpp2: software tso support
authorAntoine Ténart <antoine.tenart@free-electrons.com>
Wed, 23 Aug 2017 07:46:56 +0000 (09:46 +0200)
committerDavid S. Miller <davem@davemloft.net>
Thu, 24 Aug 2017 03:42:10 +0000 (20:42 -0700)
The patch uses the tso API to implement the tso functionality in Marvell
PPv2 driver.

Using iperf and 10G ports, using TSO shows a significant performance
improvement by a factor 2 to reach around 9.5Gbps in TX; as well as a
significant CPU usage drop (from 25% to 15%).

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/marvell/mvpp2.c

index eee8788..7fa251b 100644 (file)
@@ -35,6 +35,7 @@
 #include <uapi/linux/ppp_defs.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/tso.h>
 
 /* RX Fifo Registers */
 #define MVPP2_RX_DATA_FIFO_SIZE_REG(port)      (0x00 + 4 * (port))
@@ -1010,6 +1011,10 @@ struct mvpp2_txq_pcpu {
 
        /* Index of the TX DMA descriptor to be cleaned up */
        int txq_get_index;
+
+       /* DMA buffer for TSO headers */
+       char *tso_headers;
+       dma_addr_t tso_headers_dma;
 };
 
 struct mvpp2_tx_queue {
@@ -5494,6 +5499,14 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
                txq_pcpu->reserved_num = 0;
                txq_pcpu->txq_put_index = 0;
                txq_pcpu->txq_get_index = 0;
+
+               txq_pcpu->tso_headers =
+                       dma_alloc_coherent(port->dev->dev.parent,
+                                          MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
+                                          &txq_pcpu->tso_headers_dma,
+                                          GFP_KERNEL);
+               if (!txq_pcpu->tso_headers)
+                       goto cleanup;
        }
 
        return 0;
@@ -5501,6 +5514,11 @@ cleanup:
        for_each_present_cpu(cpu) {
                txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
                kfree(txq_pcpu->buffs);
+
+               dma_free_coherent(port->dev->dev.parent,
+                                 MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+                                 txq_pcpu->tso_headers,
+                                 txq_pcpu->tso_headers_dma);
        }
 
        dma_free_coherent(port->dev->dev.parent,
@@ -5520,6 +5538,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
        for_each_present_cpu(cpu) {
                txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
                kfree(txq_pcpu->buffs);
+
+               dma_free_coherent(port->dev->dev.parent,
+                                 MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+                                 txq_pcpu->tso_headers,
+                                 txq_pcpu->tso_headers_dma);
        }
 
        if (txq->descs)
@@ -6049,6 +6072,123 @@ cleanup:
        return -ENOMEM;
 }
 
+static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
+                                    struct net_device *dev,
+                                    struct mvpp2_tx_queue *txq,
+                                    struct mvpp2_tx_queue *aggr_txq,
+                                    struct mvpp2_txq_pcpu *txq_pcpu,
+                                    int hdr_sz)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+       dma_addr_t addr;
+
+       mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+       mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);
+
+       addr = txq_pcpu->tso_headers_dma +
+              txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+       mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
+       mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);
+
+       mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
+                                           MVPP2_TXD_F_DESC |
+                                           MVPP2_TXD_PADDING_DISABLE);
+       mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+}
+
+static inline int mvpp2_tso_put_data(struct sk_buff *skb,
+                                    struct net_device *dev, struct tso_t *tso,
+                                    struct mvpp2_tx_queue *txq,
+                                    struct mvpp2_tx_queue *aggr_txq,
+                                    struct mvpp2_txq_pcpu *txq_pcpu,
+                                    int sz, bool left, bool last)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
+       dma_addr_t buf_dma_addr;
+
+       mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
+       mvpp2_txdesc_size_set(port, tx_desc, sz);
+
+       buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz,
+                                     DMA_TO_DEVICE);
+       if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
+               mvpp2_txq_desc_put(txq);
+               return -ENOMEM;
+       }
+
+       mvpp2_txdesc_offset_set(port, tx_desc,
+                               buf_dma_addr & MVPP2_TX_DESC_ALIGN);
+       mvpp2_txdesc_dma_addr_set(port, tx_desc,
+                                 buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);
+
+       if (!left) {
+               mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
+               if (last) {
+                       mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
+                       return 0;
+               }
+       } else {
+               mvpp2_txdesc_cmd_set(port, tx_desc, 0);
+       }
+
+       mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
+       return 0;
+}
+
+static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev,
+                       struct mvpp2_tx_queue *txq,
+                       struct mvpp2_tx_queue *aggr_txq,
+                       struct mvpp2_txq_pcpu *txq_pcpu)
+{
+       struct mvpp2_port *port = netdev_priv(dev);
+       struct tso_t tso;
+       int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb);
+       int i, len, descs = 0;
+
+       /* Check number of available descriptors */
+       if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
+                                     tso_count_descs(skb)) ||
+           mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
+                                            tso_count_descs(skb)))
+               return 0;
+
+       tso_start(skb, &tso);
+       len = skb->len - hdr_sz;
+       while (len > 0) {
+               int left = min_t(int, skb_shinfo(skb)->gso_size, len);
+               char *hdr = txq_pcpu->tso_headers +
+                           txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
+
+               len -= left;
+               descs++;
+
+               tso_build_hdr(skb, hdr, &tso, left, len == 0);
+               mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz);
+
+               while (left > 0) {
+                       int sz = min_t(int, tso.size, left);
+                       left -= sz;
+                       descs++;
+
+                       if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq,
+                                              txq_pcpu, sz, left, len == 0))
+                               goto release;
+                       tso_build_data(skb, &tso, sz);
+               }
+       }
+
+       return descs;
+
+release:
+       for (i = descs - 1; i >= 0; i--) {
+               struct mvpp2_tx_desc *tx_desc = txq->descs + i;
+               tx_desc_unmap_put(port, txq, tx_desc);
+       }
+       return 0;
+}
+
 /* Main tx processing */
 static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
 {
@@ -6066,6 +6206,10 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
        txq_pcpu = this_cpu_ptr(txq->pcpu);
        aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];
 
+       if (skb_is_gso(skb)) {
+               frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
+               goto out;
+       }
        frags = skb_shinfo(skb)->nr_frags + 1;
 
        /* Check number of available descriptors */
@@ -6115,22 +6259,21 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
                }
        }
 
-       txq_pcpu->reserved_num -= frags;
-       txq_pcpu->count += frags;
-       aggr_txq->count += frags;
-
-       /* Enable transmit */
-       wmb();
-       mvpp2_aggr_txq_pend_desc_add(port, frags);
-
-       if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
-               struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
-
-               netif_tx_stop_queue(nq);
-       }
 out:
        if (frags > 0) {
                struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
+               struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);
+
+               txq_pcpu->reserved_num -= frags;
+               txq_pcpu->count += frags;
+               aggr_txq->count += frags;
+
+               /* Enable transmit */
+               wmb();
+               mvpp2_aggr_txq_pend_desc_add(port, frags);
+
+               if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
+                       netif_tx_stop_queue(nq);
 
                u64_stats_update_begin(&stats->syncp);
                stats->tx_packets++;
@@ -7255,7 +7398,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
                }
        }
 
-       features = NETIF_F_SG | NETIF_F_IP_CSUM;
+       features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
        dev->features = features | NETIF_F_RXCSUM;
        dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
        dev->vlan_features |= features;