From 79f339125ea316e910220e5f5b4ad30370f4de85 Mon Sep 17 00:00:00 2001 From: Nimrod Andy Date: Thu, 12 Jun 2014 08:16:23 +0800 Subject: [PATCH] net: fec: Add software TSO support Add software TSO support for FEC. This feature allows to improve outbound throughput performance. Tested on imx6dl sabresd board, running iperf tcp tests shows: - 16.2% improvement comparing with FEC SG patch - 82% improvement comparing with NO SG & TSO patch $ ethtool -K eth0 tso on $ iperf -c 10.192.242.167 -t 3 & [ 3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001 [ ID] Interval Transfer Bandwidth [ 3] 0.0- 3.0 sec 181 MBytes 506 Mbits/sec During the testing, CPU loading is 30%. Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the performance with SW TSO is a milestone. CC: Ezequiel Garcia CC: Eric Dumazet CC: David Laight CC: Li Frank Signed-off-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 6 + drivers/net/ethernet/freescale/fec_main.c | 255 +++++++++++++++++++++++++++--- 2 files changed, 238 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index e7ce14d..671d080 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -299,6 +299,12 @@ struct fec_enet_private { unsigned short bufdesc_size; unsigned short tx_ring_size; unsigned short rx_ring_size; + unsigned short tx_stop_threshold; + unsigned short tx_wake_threshold; + + /* Software TSO */ + char *tso_hdrs; + dma_addr_t tso_hdrs_dma; struct platform_device *pdev; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index bea00a8..38d9d27 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #define FEC_PAUSE_FLAG_AUTONEG 0x1 #define FEC_PAUSE_FLAG_ENABLE 0x2 +#define TSO_HEADER_SIZE 128 +/* Max number of allowed TCP segments for software TSO */ +#define FEC_MAX_TSO_SEGS 100 +#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) + +#define IS_TSO_HEADER(txq, addr) \ + ((addr >= txq->tso_hdrs_dma) && \ + (addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE)) + static int mii_cnt; static inline @@ -438,8 +448,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) unsigned short buflen; unsigned int estatus = 0; unsigned int index; + int entries_free; int ret; + entries_free = fec_enet_get_free_txdesc_num(fep); + if (entries_free < MAX_SKB_FRAGS + 1) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "NOT enough BD for SG!\n"); + return NETDEV_TX_OK; + } + /* Protocol checksum off-load for TCP and UDP. */ if (fec_enet_clear_csum(skb, ndev)) { dev_kfree_skb_any(skb); @@ -534,35 +553,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev) return 0; } -static netdev_tx_t -fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static int +fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev, + struct bufdesc *bdp, int index, char *data, + int size, bool last_tcp, bool is_last) { struct fec_enet_private *fep = netdev_priv(ndev); - struct bufdesc *bdp; - unsigned short status; - int entries_free; - int ret; - - /* Fill in a Tx ring entry */ - bdp = fep->cur_tx; + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + unsigned short status; + unsigned int estatus = 0; status = bdp->cbd_sc; + status &= ~BD_ENET_TX_STATS; - if (status & BD_ENET_TX_READY) { - /* Ooops. All transmit buffers are full. Bail out. - * This should not happen, since ndev->tbusy should be set. - */ + status |= (BD_ENET_TX_TC | BD_ENET_TX_READY); + bdp->cbd_datlen = size; + + if (((unsigned long) data) & FEC_ALIGNMENT || + id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { + memcpy(fep->tx_bounce[index], data, size); + data = fep->tx_bounce[index]; + + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(data, size); + } + + bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data, + size, DMA_TO_DEVICE); + if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) { + dev_kfree_skb_any(skb); if (net_ratelimit()) - netdev_err(ndev, "tx queue full!\n"); + netdev_err(ndev, "Tx DMA memory map failed\n"); return NETDEV_TX_BUSY; } - ret = fec_enet_txq_submit_skb(skb, ndev); + if (fep->bufdesc_ex) { + if (skb->ip_summed == CHECKSUM_PARTIAL) + estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = estatus; + } + + /* Handle the last BD specially */ + if (last_tcp) + status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC); + if (is_last) { + status |= BD_ENET_TX_INTR; + if (fep->bufdesc_ex) + ebdp->cbd_esc |= BD_ENET_TX_INT; + } + + bdp->cbd_sc = status; + + return 0; +} + +static int +fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev, + struct bufdesc *bdp, int index) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + const struct platform_device_id *id_entry = + platform_get_device_id(fep->pdev); + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + void *bufaddr; + unsigned long dmabuf; + unsigned short status; + unsigned int estatus = 0; + + status = bdp->cbd_sc; + status &= ~BD_ENET_TX_STATS; + status |= (BD_ENET_TX_TC | BD_ENET_TX_READY); + + bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE; + dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE; + if (((unsigned long) bufaddr) & FEC_ALIGNMENT || + id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) { + memcpy(fep->tx_bounce[index], skb->data, hdr_len); + bufaddr = fep->tx_bounce[index]; + + if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) + swap_buffer(bufaddr, hdr_len); + + dmabuf = dma_map_single(&fep->pdev->dev, bufaddr, + hdr_len, DMA_TO_DEVICE); + if (dma_mapping_error(&fep->pdev->dev, dmabuf)) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "Tx DMA memory map failed\n"); + return NETDEV_TX_BUSY; + } + } + + bdp->cbd_bufaddr = dmabuf; + bdp->cbd_datlen = hdr_len; + + if (fep->bufdesc_ex) { + if (skb->ip_summed == CHECKSUM_PARTIAL) + estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS; + ebdp->cbd_bdu = 0; + ebdp->cbd_esc = estatus; + } + + bdp->cbd_sc = status; + + return 0; +} + +static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + int total_len, data_left; + struct bufdesc *bdp = fep->cur_tx; + struct tso_t tso; + unsigned int index = 0; + int ret; + + if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) { + dev_kfree_skb_any(skb); + if (net_ratelimit()) + netdev_err(ndev, "NOT enough BD for TSO!\n"); + return NETDEV_TX_OK; + } + + /* Protocol checksum off-load for TCP and UDP. */ + if (fec_enet_clear_csum(skb, ndev)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* Initialize the TSO handler, and prepare the first payload */ + tso_start(skb, &tso); + + total_len = skb->len - hdr_len; + while (total_len > 0) { + char *hdr; + + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + + /* prepare packet headers: MAC + IP + TCP */ + hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE; + tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); + ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index); + if (ret) + goto err_release; + + while (data_left > 0) { + int size; + + size = min_t(int, tso.size, data_left); + bdp = fec_enet_get_nextdesc(bdp, fep); + index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); + ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data, + size, size == data_left, + total_len == 0); + if (ret) + goto err_release; + + data_left -= size; + tso_build_data(skb, &tso, size); + } + + bdp = fec_enet_get_nextdesc(bdp, fep); + } + + /* Save skb pointer */ + fep->tx_skbuff[index] = skb; + + fec_enet_submit_work(bdp, fep); + + skb_tx_timestamp(skb); + fep->cur_tx = bdp; + + /* Trigger transmission start */ + writel(0, fep->hwp + FEC_X_DES_ACTIVE); + + return 0; + +err_release: + /* TODO: Release all used data descriptors for TSO */ + return ret; +} + +static netdev_tx_t +fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + int entries_free; + int ret; + + if (skb_is_gso(skb)) + ret = fec_enet_txq_submit_tso(skb, ndev); + else + ret = fec_enet_txq_submit_skb(skb, ndev); if (ret) return ret; entries_free = fec_enet_get_free_txdesc_num(fep); - if (entries_free < MAX_SKB_FRAGS + 1) + if (entries_free <= fep->tx_stop_threshold) netif_stop_queue(ndev); return NETDEV_TX_OK; @@ -883,7 +1077,7 @@ fec_enet_tx(struct net_device *ndev) unsigned short status; struct sk_buff *skb; int index = 0; - int entries; + int entries_free; fep = netdev_priv(ndev); bdp = fep->dirty_tx; @@ -900,8 +1094,9 @@ fec_enet_tx(struct net_device *ndev) index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep); skb = fep->tx_skbuff[index]; - dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen, - DMA_TO_DEVICE); + if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr)) + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + bdp->cbd_datlen, DMA_TO_DEVICE); bdp->cbd_bufaddr = 0; if (!skb) { bdp = fec_enet_get_nextdesc(bdp, fep); @@ -962,9 +1157,11 @@ fec_enet_tx(struct net_device *ndev) /* Since we have freed up a buffer, the ring is no longer full */ - entries = fec_enet_get_free_txdesc_num(fep); - if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev)) - netif_wake_queue(ndev); + if (netif_queue_stopped(ndev)) { + entries_free = fec_enet_get_free_txdesc_num(fep); + if (entries_free >= fep->tx_wake_threshold) + netif_wake_queue(ndev); + } } return; } @@ -2166,6 +2363,9 @@ static int fec_enet_init(struct net_device *ndev) fep->tx_ring_size = TX_RING_SIZE; fep->rx_ring_size = RX_RING_SIZE; + fep->tx_stop_threshold = FEC_MAX_SKB_DESCS; + fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2; + if (fep->bufdesc_ex) fep->bufdesc_size = sizeof(struct bufdesc_ex); else @@ -2179,6 +2379,13 @@ static int fec_enet_init(struct net_device *ndev) if (!cbd_base) return -ENOMEM; + fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE, + &fep->tso_hdrs_dma, GFP_KERNEL); + if (!fep->tso_hdrs) { + dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma); + return -ENOMEM; + } + memset(cbd_base, 0, PAGE_SIZE); fep->netdev = ndev; @@ -2209,9 +2416,11 @@ static int fec_enet_init(struct net_device *ndev) ndev->features |= NETIF_F_HW_VLAN_CTAG_RX; if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) { + ndev->gso_max_segs = FEC_MAX_TSO_SEGS; + /* enable hw accelerator */ ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM - | NETIF_F_RXCSUM | NETIF_F_SG); + | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO); fep->csum_flags |= FLAG_RX_CSUM_ENABLED; } -- 2.7.4