From 76f31e8b0911e620ac9191c8d3775cc91ed65c4c Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Mon, 13 Jul 2015 16:22:03 +0300 Subject: [PATCH] gianfar: Bundle Rx allocation, cleanup Use a more common consumer/ producer index design to improve rx buffer allocation. Instead of allocating a single new buffer (skb) on each iteration, bundle the allocation of several rx buffers at a time. This also opens the path for further memory optimizations. Remove useless check of rxq->rfbptr, since this patch touches rx pause frame handling code as well. rxq->rfbptr is always initialized as part of Rx BD ring init. Remove redundant (and misleading) 'amount_pull' parameter. Signed-off-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 201 +++++++++--------- drivers/net/ethernet/freescale/gianfar.h | 39 +++- .../net/ethernet/freescale/gianfar_ethtool.c | 3 + 3 files changed, 136 insertions(+), 107 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index ff875028fdff..b35bf3de44e0 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -116,8 +116,8 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev); static void gfar_reset_task(struct work_struct *work); static void gfar_timeout(struct net_device *dev); static int gfar_close(struct net_device *dev); -static struct sk_buff *gfar_new_skb(struct net_device *dev, - dma_addr_t *bufaddr); +static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue, + int alloc_cnt); static int gfar_set_mac_address(struct net_device *dev); static int gfar_change_mtu(struct net_device *dev, int new_mtu); static irqreturn_t gfar_error(int irq, void *dev_id); @@ -142,7 +142,7 @@ static void gfar_netpoll(struct net_device *dev); int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit); static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue); static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb, - int amount_pull, struct napi_struct *napi); + struct napi_struct *napi); static void gfar_halt_nodisable(struct gfar_private *priv); static void gfar_clear_exact_match(struct net_device *dev); static void gfar_set_mac_for_addr(struct net_device *dev, int num, @@ -169,17 +169,15 @@ static void gfar_init_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp, bdp->lstatus = cpu_to_be32(lstatus); } -static int gfar_init_bds(struct net_device *ndev) +static void gfar_init_bds(struct net_device *ndev) { struct gfar_private *priv = netdev_priv(ndev); struct gfar __iomem *regs = priv->gfargrp[0].regs; struct gfar_priv_tx_q *tx_queue = NULL; struct gfar_priv_rx_q *rx_queue = NULL; struct txbd8 *txbdp; - struct rxbd8 *rxbdp; u32 __iomem *rfbptr; int i, j; - dma_addr_t bufaddr; for (i = 0; i < priv->num_tx_queues; i++) { tx_queue = priv->tx_queue[i]; @@ -207,33 +205,18 @@ static int gfar_init_bds(struct net_device *ndev) rfbptr = ®s->rfbptr0; for (i = 0; i < priv->num_rx_queues; i++) { rx_queue = priv->rx_queue[i]; - rx_queue->cur_rx = rx_queue->rx_bd_base; - rx_queue->skb_currx = 0; - rxbdp = rx_queue->rx_bd_base; - - for (j = 0; j < rx_queue->rx_ring_size; j++) { - struct sk_buff *skb = rx_queue->rx_skbuff[j]; - if (skb) { - bufaddr = be32_to_cpu(rxbdp->bufPtr); - } else { - skb = gfar_new_skb(ndev, &bufaddr); - if (!skb) { - netdev_err(ndev, "Can't allocate RX buffers\n"); - return -ENOMEM; - } - rx_queue->rx_skbuff[j] = skb; - } + rx_queue->next_to_clean = 0; + rx_queue->next_to_use = 0; - gfar_init_rxbdp(rx_queue, rxbdp, bufaddr); - rxbdp++; - } + /* make sure next_to_clean != next_to_use after this + * by leaving at least 1 unused descriptor + */ + gfar_alloc_rx_buffs(rx_queue, gfar_rxbd_unused(rx_queue)); rx_queue->rfbptr = rfbptr; rfbptr += 2; } - - return 0; } static int gfar_alloc_skb_resources(struct net_device *ndev) @@ -311,8 +294,7 @@ static int gfar_alloc_skb_resources(struct net_device *ndev) rx_queue->rx_skbuff[j] = NULL; } - if (gfar_init_bds(ndev)) - goto cleanup; + gfar_init_bds(ndev); return 0; @@ -1639,10 +1621,7 @@ static int gfar_restore(struct device *dev) return 0; } - if (gfar_init_bds(ndev)) { - free_skb_resources(priv); - return -ENOMEM; - } + gfar_init_bds(ndev); gfar_mac_reset(priv); @@ -2704,30 +2683,19 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) netdev_tx_completed_queue(txq, howmany, bytes_sent); } -static struct sk_buff *gfar_alloc_skb(struct net_device *dev) +static struct sk_buff *gfar_new_skb(struct net_device *ndev, + dma_addr_t *bufaddr) { - struct gfar_private *priv = netdev_priv(dev); + struct gfar_private *priv = netdev_priv(ndev); struct sk_buff *skb; + dma_addr_t addr; - skb = netdev_alloc_skb(dev, priv->rx_buffer_size + RXBUF_ALIGNMENT); + skb = netdev_alloc_skb(ndev, priv->rx_buffer_size + RXBUF_ALIGNMENT); if (!skb) return NULL; gfar_align_skb(skb); - return skb; -} - -static struct sk_buff *gfar_new_skb(struct net_device *dev, dma_addr_t *bufaddr) -{ - struct gfar_private *priv = netdev_priv(dev); - struct sk_buff *skb; - dma_addr_t addr; - - skb = gfar_alloc_skb(dev); - if (!skb) - return NULL; - addr = dma_map_single(priv->dev, skb->data, priv->rx_buffer_size, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(priv->dev, addr))) { @@ -2739,6 +2707,55 @@ static struct sk_buff *gfar_new_skb(struct net_device *dev, dma_addr_t *bufaddr) return skb; } +static void gfar_rx_alloc_err(struct gfar_priv_rx_q *rx_queue) +{ + struct gfar_private *priv = netdev_priv(rx_queue->dev); + struct gfar_extra_stats *estats = &priv->extra_stats; + + netdev_err(rx_queue->dev, "Can't alloc RX buffers\n"); + atomic64_inc(&estats->rx_alloc_err); +} + +static void gfar_alloc_rx_buffs(struct gfar_priv_rx_q *rx_queue, + int alloc_cnt) +{ + struct net_device *ndev = rx_queue->dev; + struct rxbd8 *bdp, *base; + dma_addr_t bufaddr; + int i; + + i = rx_queue->next_to_use; + base = rx_queue->rx_bd_base; + bdp = &rx_queue->rx_bd_base[i]; + + while (alloc_cnt--) { + struct sk_buff *skb = rx_queue->rx_skbuff[i]; + + if (likely(!skb)) { + skb = gfar_new_skb(ndev, &bufaddr); + if (unlikely(!skb)) { + gfar_rx_alloc_err(rx_queue); + break; + } + } else { /* restore from sleep state */ + bufaddr = be32_to_cpu(bdp->bufPtr); + } + + rx_queue->rx_skbuff[i] = skb; + + /* Setup the new RxBD */ + gfar_init_rxbdp(rx_queue, bdp, bufaddr); + + /* Update to the next pointer */ + bdp = next_bd(bdp, base, rx_queue->rx_ring_size); + + if (unlikely(++i == rx_queue->rx_ring_size)) + i = 0; + } + + rx_queue->next_to_use = i; +} + static inline void count_errors(unsigned short status, struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); @@ -2838,7 +2855,7 @@ static inline void gfar_rx_checksum(struct sk_buff *skb, struct rxfcb *fcb) /* gfar_process_frame() -- handle one incoming packet if skb isn't NULL. */ static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb, - int amount_pull, struct napi_struct *napi) + struct napi_struct *napi) { struct gfar_private *priv = netdev_priv(dev); struct rxfcb *fcb = NULL; @@ -2849,9 +2866,9 @@ static void gfar_process_frame(struct net_device *dev, struct sk_buff *skb, /* Remove the FCB from the skb * Remove the padded bytes, if there are any */ - if (amount_pull) { + if (priv->uses_rxfcb) { skb_record_rx_queue(skb, fcb->rq); - skb_pull(skb, amount_pull); + skb_pull(skb, GMAC_FCB_LEN); } /* Get receive timestamp from the skb */ @@ -2895,27 +2912,30 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) struct net_device *dev = rx_queue->dev; struct rxbd8 *bdp, *base; struct sk_buff *skb; - int pkt_len; - int amount_pull; - int howmany = 0; + int i, howmany = 0; + int cleaned_cnt = gfar_rxbd_unused(rx_queue); struct gfar_private *priv = netdev_priv(dev); /* Get the first full descriptor */ - bdp = rx_queue->cur_rx; base = rx_queue->rx_bd_base; + i = rx_queue->next_to_clean; - amount_pull = priv->uses_rxfcb ? GMAC_FCB_LEN : 0; + while (rx_work_limit--) { - while (!(be16_to_cpu(bdp->status) & RXBD_EMPTY) && rx_work_limit--) { - struct sk_buff *newskb; - dma_addr_t bufaddr; + if (cleaned_cnt >= GFAR_RX_BUFF_ALLOC) { + gfar_alloc_rx_buffs(rx_queue, cleaned_cnt); + cleaned_cnt = 0; + } - rmb(); + bdp = &rx_queue->rx_bd_base[i]; + if (be16_to_cpu(bdp->status) & RXBD_EMPTY) + break; - /* Add another skb for the future */ - newskb = gfar_new_skb(dev, &bufaddr); + /* order rx buffer descriptor reads */ + rmb(); - skb = rx_queue->rx_skbuff[rx_queue->skb_currx]; + /* fetch next to clean buffer from the ring */ + skb = rx_queue->rx_skbuff[i]; dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr), priv->rx_buffer_size, DMA_FROM_DEVICE); @@ -2924,30 +2944,26 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) be16_to_cpu(bdp->length) > priv->rx_buffer_size)) bdp->status = cpu_to_be16(RXBD_LARGE); - /* We drop the frame if we failed to allocate a new buffer */ - if (unlikely(!newskb || - !(be16_to_cpu(bdp->status) & RXBD_LAST) || + if (unlikely(!(be16_to_cpu(bdp->status) & RXBD_LAST) || be16_to_cpu(bdp->status) & RXBD_ERR)) { count_errors(be16_to_cpu(bdp->status), dev); - if (unlikely(!newskb)) { - newskb = skb; - bufaddr = be32_to_cpu(bdp->bufPtr); - } else if (skb) - dev_kfree_skb(skb); + /* discard faulty buffer */ + dev_kfree_skb(skb); + } else { /* Increment the number of packets */ rx_queue->stats.rx_packets++; howmany++; if (likely(skb)) { - pkt_len = be16_to_cpu(bdp->length) - + int pkt_len = be16_to_cpu(bdp->length) - ETH_FCS_LEN; /* Remove the FCS from the packet length */ skb_put(skb, pkt_len); rx_queue->stats.rx_bytes += pkt_len; skb_record_rx_queue(skb, rx_queue->qindex); - gfar_process_frame(dev, skb, amount_pull, + gfar_process_frame(dev, skb, &rx_queue->grp->napi_rx); } else { @@ -2958,26 +2974,23 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) } - rx_queue->rx_skbuff[rx_queue->skb_currx] = newskb; - - /* Setup the new bdp */ - gfar_init_rxbdp(rx_queue, bdp, bufaddr); + rx_queue->rx_skbuff[i] = NULL; + cleaned_cnt++; + if (unlikely(++i == rx_queue->rx_ring_size)) + i = 0; + } - /* Update Last Free RxBD pointer for LFC */ - if (unlikely(rx_queue->rfbptr && priv->tx_actual_en)) - gfar_write(rx_queue->rfbptr, (u32)bdp); + rx_queue->next_to_clean = i; - /* Update to the next pointer */ - bdp = next_bd(bdp, base, rx_queue->rx_ring_size); + if (cleaned_cnt) + gfar_alloc_rx_buffs(rx_queue, cleaned_cnt); - /* update to point at the next skb */ - rx_queue->skb_currx = (rx_queue->skb_currx + 1) & - RX_RING_MOD_MASK(rx_queue->rx_ring_size); + /* Update Last Free RxBD pointer for LFC */ + if (unlikely(priv->tx_actual_en)) { + bdp = gfar_rxbd_lastfree(rx_queue); + gfar_write(rx_queue->rfbptr, (u32)bdp); } - /* Update the current rxbd pointer to be the next one */ - rx_queue->cur_rx = bdp; - return howmany; } @@ -3552,14 +3565,8 @@ static noinline void gfar_update_link_state(struct gfar_private *priv) if ((tempval1 & MACCFG1_TX_FLOW) && !tx_flow_oldval) { for (i = 0; i < priv->num_rx_queues; i++) { rx_queue = priv->rx_queue[i]; - bdp = rx_queue->cur_rx; - /* skip to previous bd */ - bdp = skip_bd(bdp, rx_queue->rx_ring_size - 1, - rx_queue->rx_bd_base, - rx_queue->rx_ring_size); - - if (rx_queue->rfbptr) - gfar_write(rx_queue->rfbptr, (u32)bdp); + bdp = gfar_rxbd_lastfree(rx_queue); + gfar_write(rx_queue->rfbptr, (u32)bdp); } priv->tx_actual_en = 1; diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index daa1d37de642..cadb068cb37f 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -92,6 +92,8 @@ extern const char gfar_driver_version[]; #define DEFAULT_TX_RING_SIZE 256 #define DEFAULT_RX_RING_SIZE 256 +#define GFAR_RX_BUFF_ALLOC 16 + #define GFAR_RX_MAX_RING_SIZE 256 #define GFAR_TX_MAX_RING_SIZE 256 @@ -640,6 +642,7 @@ struct rmon_mib }; struct gfar_extra_stats { + atomic64_t rx_alloc_err; atomic64_t rx_large; atomic64_t rx_short; atomic64_t rx_nonoctet; @@ -1015,9 +1018,9 @@ struct rx_q_stats { /** * struct gfar_priv_rx_q - per rx queue structure * @rx_skbuff: skb pointers - * @skb_currx: currently use skb pointer * @rx_bd_base: First rx buffer descriptor - * @cur_rx: Next free rx ring entry + * @next_to_use: index of the next buffer to be alloc'd + * @next_to_clean: index of the next buffer to be cleaned * @qindex: index of this queue * @dev: back pointer to the dev structure * @rx_ring_size: Rx ring size @@ -1027,19 +1030,18 @@ struct rx_q_stats { struct gfar_priv_rx_q { struct sk_buff **rx_skbuff __aligned(SMP_CACHE_BYTES); - dma_addr_t rx_bd_dma_base; struct rxbd8 *rx_bd_base; - struct rxbd8 *cur_rx; struct net_device *dev; - struct gfar_priv_grp *grp; + struct gfar_priv_grp *grp; + u16 rx_ring_size; + u16 qindex; + u16 next_to_clean; + u16 next_to_use; struct rx_q_stats stats; - u16 skb_currx; - u16 qindex; - unsigned int rx_ring_size; - /* RX Coalescing values */ + u32 __iomem *rfbptr; unsigned char rxcoalescing; unsigned long rxic; - u32 __iomem *rfbptr; + dma_addr_t rx_bd_dma_base; }; enum gfar_irqinfo_id { @@ -1295,6 +1297,23 @@ static inline void gfar_clear_txbd_status(struct txbd8 *bdp) bdp->lstatus = cpu_to_be32(lstatus); } +static inline int gfar_rxbd_unused(struct gfar_priv_rx_q *rxq) +{ + if (rxq->next_to_clean > rxq->next_to_use) + return rxq->next_to_clean - rxq->next_to_use - 1; + + return rxq->rx_ring_size + rxq->next_to_clean - rxq->next_to_use - 1; +} + +static inline struct rxbd8 *gfar_rxbd_lastfree(struct gfar_priv_rx_q *rxq) +{ + int i; + + i = rxq->next_to_use ? rxq->next_to_use - 1 : rxq->rx_ring_size - 1; + + return &rxq->rx_bd_base[i]; +} + irqreturn_t gfar_receive(int irq, void *dev_id); int startup_gfar(struct net_device *dev); void stop_gfar(struct net_device *dev); diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index fda12fb32ec7..012fa4e79ffa 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -61,6 +61,8 @@ static void gfar_gdrvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo); static const char stat_gstrings[][ETH_GSTRING_LEN] = { + /* extra stats */ + "rx-allocation-errors", "rx-large-frame-errors", "rx-short-frame-errors", "rx-non-octet-errors", @@ -74,6 +76,7 @@ static const char stat_gstrings[][ETH_GSTRING_LEN] = { "tx-underrun-errors", "rx-skb-missing-errors", "tx-timeout-errors", + /* rmon stats */ "tx-rx-64-frames", "tx-rx-65-127-frames", "tx-rx-128-255-frames", -- 2.34.1