net: axienet: Use napi_alloc_skb when refilling RX ring
[platform/kernel/linux-rpi.git] / drivers / net / ethernet / xilinx / xilinx_axienet_main.c
index 377c94e..c7eb05e 100644 (file)
@@ -7,7 +7,7 @@
  * Copyright (c) 2008-2009 Secret Lab Technologies Ltd.
  * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu>
  * Copyright (c) 2010 - 2011 PetaLogix
- * Copyright (c) 2019 SED Systems, a division of Calian Ltd.
+ * Copyright (c) 2019 - 2022 Calian Advanced Technologies
  * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved.
  *
  * This is a driver for the Xilinx Axi Ethernet which is used in the Virtex6
@@ -33,7 +33,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/skbuff.h>
-#include <linux/spinlock.h>
+#include <linux/math64.h>
 #include <linux/phy.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
@@ -190,7 +190,7 @@ static void axienet_dma_bd_release(struct net_device *ndev)
        struct axienet_local *lp = netdev_priv(ndev);
 
        /* If we end up here, tx_bd_v must have been DMA allocated. */
-       dma_free_coherent(ndev->dev.parent,
+       dma_free_coherent(lp->dev,
                          sizeof(*lp->tx_bd_v) * lp->tx_bd_num,
                          lp->tx_bd_v,
                          lp->tx_bd_p);
@@ -215,18 +215,90 @@ static void axienet_dma_bd_release(struct net_device *ndev)
                 */
                if (lp->rx_bd_v[i].cntrl) {
                        phys = desc_get_phys_addr(lp, &lp->rx_bd_v[i]);
-                       dma_unmap_single(ndev->dev.parent, phys,
+                       dma_unmap_single(lp->dev, phys,
                                         lp->max_frm_size, DMA_FROM_DEVICE);
                }
        }
 
-       dma_free_coherent(ndev->dev.parent,
+       dma_free_coherent(lp->dev,
                          sizeof(*lp->rx_bd_v) * lp->rx_bd_num,
                          lp->rx_bd_v,
                          lp->rx_bd_p);
 }
 
 /**
+ * axienet_usec_to_timer - Calculate IRQ delay timer value
+ * @lp:                Pointer to the axienet_local structure
+ * @coalesce_usec: Microseconds to convert into timer value
+ */
+static u32 axienet_usec_to_timer(struct axienet_local *lp, u32 coalesce_usec)
+{
+       u32 result;
+       u64 clk_rate = 125000000; /* arbitrary guess if no clock rate set */
+
+       if (lp->axi_clk)
+               clk_rate = clk_get_rate(lp->axi_clk);
+
+       /* 1 Timeout Interval = 125 * (clock period of SG clock) */
+       result = DIV64_U64_ROUND_CLOSEST((u64)coalesce_usec * clk_rate,
+                                        (u64)125000000);
+       if (result > 255)
+               result = 255;
+
+       return result;
+}
+
+/**
+ * axienet_dma_start - Set up DMA registers and start DMA operation
+ * @lp:                Pointer to the axienet_local structure
+ */
+static void axienet_dma_start(struct axienet_local *lp)
+{
+       u32 tx_cr;
+
+       /* Start updating the Rx channel control register */
+       lp->rx_dma_cr = (lp->coalesce_count_rx << XAXIDMA_COALESCE_SHIFT) |
+                       XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK;
+       /* Only set interrupt delay timer if not generating an interrupt on
+        * the first RX packet. Otherwise leave at 0 to disable delay interrupt.
+        */
+       if (lp->coalesce_count_rx > 1)
+               lp->rx_dma_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_rx)
+                                       << XAXIDMA_DELAY_SHIFT) |
+                                XAXIDMA_IRQ_DELAY_MASK;
+       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
+
+       /* Start updating the Tx channel control register */
+       tx_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) |
+               XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK;
+       /* Only set interrupt delay timer if not generating an interrupt on
+        * the first TX packet. Otherwise leave at 0 to disable delay interrupt.
+        */
+       if (lp->coalesce_count_tx > 1)
+               tx_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx)
+                               << XAXIDMA_DELAY_SHIFT) |
+                        XAXIDMA_IRQ_DELAY_MASK;
+       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr);
+
+       /* Populate the tail pointer and bring the Rx Axi DMA engine out of
+        * halted state. This will make the Rx side ready for reception.
+        */
+       axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p);
+       lp->rx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK;
+       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
+       axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
+                            (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
+
+       /* Write to the RS (Run-stop) bit in the Tx channel control register.
+        * Tx channel is now ready to run. But only after we write to the
+        * tail pointer register that the Tx channel will start transmitting.
+        */
+       axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p);
+       tx_cr |= XAXIDMA_CR_RUNSTOP_MASK;
+       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr);
+}
+
+/**
  * axienet_dma_bd_init - Setup buffer descriptor rings for Axi DMA
  * @ndev:      Pointer to the net_device structure
  *
@@ -238,7 +310,6 @@ static void axienet_dma_bd_release(struct net_device *ndev)
  */
 static int axienet_dma_bd_init(struct net_device *ndev)
 {
-       u32 cr;
        int i;
        struct sk_buff *skb;
        struct axienet_local *lp = netdev_priv(ndev);
@@ -249,13 +320,13 @@ static int axienet_dma_bd_init(struct net_device *ndev)
        lp->rx_bd_ci = 0;
 
        /* Allocate the Tx and Rx buffer descriptors. */
-       lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+       lp->tx_bd_v = dma_alloc_coherent(lp->dev,
                                         sizeof(*lp->tx_bd_v) * lp->tx_bd_num,
                                         &lp->tx_bd_p, GFP_KERNEL);
        if (!lp->tx_bd_v)
                return -ENOMEM;
 
-       lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent,
+       lp->rx_bd_v = dma_alloc_coherent(lp->dev,
                                         sizeof(*lp->rx_bd_v) * lp->rx_bd_num,
                                         &lp->rx_bd_p, GFP_KERNEL);
        if (!lp->rx_bd_v)
@@ -285,9 +356,9 @@ static int axienet_dma_bd_init(struct net_device *ndev)
                        goto out;
 
                lp->rx_bd_v[i].skb = skb;
-               addr = dma_map_single(ndev->dev.parent, skb->data,
+               addr = dma_map_single(lp->dev, skb->data,
                                      lp->max_frm_size, DMA_FROM_DEVICE);
-               if (dma_mapping_error(ndev->dev.parent, addr)) {
+               if (dma_mapping_error(lp->dev, addr)) {
                        netdev_err(ndev, "DMA mapping error\n");
                        goto out;
                }
@@ -296,50 +367,7 @@ static int axienet_dma_bd_init(struct net_device *ndev)
                lp->rx_bd_v[i].cntrl = lp->max_frm_size;
        }
 
-       /* Start updating the Rx channel control register */
-       cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-       /* Update the interrupt coalesce count */
-       cr = ((cr & ~XAXIDMA_COALESCE_MASK) |
-             ((lp->coalesce_count_rx) << XAXIDMA_COALESCE_SHIFT));
-       /* Update the delay timer count */
-       cr = ((cr & ~XAXIDMA_DELAY_MASK) |
-             (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
-       /* Enable coalesce, delay timer and error interrupts */
-       cr |= XAXIDMA_IRQ_ALL_MASK;
-       /* Write to the Rx channel control register */
-       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
-
-       /* Start updating the Tx channel control register */
-       cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-       /* Update the interrupt coalesce count */
-       cr = (((cr & ~XAXIDMA_COALESCE_MASK)) |
-             ((lp->coalesce_count_tx) << XAXIDMA_COALESCE_SHIFT));
-       /* Update the delay timer count */
-       cr = (((cr & ~XAXIDMA_DELAY_MASK)) |
-             (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
-       /* Enable coalesce, delay timer and error interrupts */
-       cr |= XAXIDMA_IRQ_ALL_MASK;
-       /* Write to the Tx channel control register */
-       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
-
-       /* Populate the tail pointer and bring the Rx Axi DMA engine out of
-        * halted state. This will make the Rx side ready for reception.
-        */
-       axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p);
-       cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
-                         cr | XAXIDMA_CR_RUNSTOP_MASK);
-       axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
-                            (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
-
-       /* Write to the RS (Run-stop) bit in the Tx channel control register.
-        * Tx channel is now ready to run. But only after we write to the
-        * tail pointer register that the Tx channel will start transmitting.
-        */
-       axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p);
-       cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET,
-                         cr | XAXIDMA_CR_RUNSTOP_MASK);
+       axienet_dma_start(lp);
 
        return 0;
 out:
@@ -531,13 +559,51 @@ static int __axienet_device_reset(struct axienet_local *lp)
 }
 
 /**
+ * axienet_dma_stop - Stop DMA operation
+ * @lp:                Pointer to the axienet_local structure
+ */
+static void axienet_dma_stop(struct axienet_local *lp)
+{
+       int count;
+       u32 cr, sr;
+
+       cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
+       cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+       synchronize_irq(lp->rx_irq);
+
+       cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
+       cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
+       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+       synchronize_irq(lp->tx_irq);
+
+       /* Give DMAs a chance to halt gracefully */
+       sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+       for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
+               msleep(20);
+               sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
+       }
+
+       sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+       for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
+               msleep(20);
+               sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
+       }
+
+       /* Do a reset to ensure DMA is really stopped */
+       axienet_lock_mii(lp);
+       __axienet_device_reset(lp);
+       axienet_unlock_mii(lp);
+}
+
+/**
  * axienet_device_reset - Reset and initialize the Axi Ethernet hardware.
  * @ndev:      Pointer to the net_device structure
  *
  * This function is called to reset and initialize the Axi Ethernet core. This
  * is typically called during initialization. It does a reset of the Axi DMA
  * Rx/Tx channels and initializes the Axi DMA BDs. Since Axi DMA reset lines
- * areconnected to Axi Ethernet reset lines, this in turn resets the Axi
+ * are connected to Axi Ethernet reset lines, this in turn resets the Axi
  * Ethernet core. No separate hardware reset is done for the Axi Ethernet
  * core.
  * Returns 0 on success or a negative error number otherwise.
@@ -636,7 +702,7 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
                /* Ensure we see complete descriptor update */
                dma_rmb();
                phys = desc_get_phys_addr(lp, cur_p);
-               dma_unmap_single(ndev->dev.parent, phys,
+               dma_unmap_single(lp->dev, phys,
                                 (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
                                 DMA_TO_DEVICE);
 
@@ -774,9 +840,9 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                cur_p->app0 |= 2; /* Tx Full Checksum Offload Enabled */
        }
 
-       phys = dma_map_single(ndev->dev.parent, skb->data,
+       phys = dma_map_single(lp->dev, skb->data,
                              skb_headlen(skb), DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) {
+       if (unlikely(dma_mapping_error(lp->dev, phys))) {
                if (net_ratelimit())
                        netdev_err(ndev, "TX DMA mapping error\n");
                ndev->stats.tx_dropped++;
@@ -790,11 +856,11 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                        lp->tx_bd_tail = 0;
                cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
                frag = &skb_shinfo(skb)->frags[ii];
-               phys = dma_map_single(ndev->dev.parent,
+               phys = dma_map_single(lp->dev,
                                      skb_frag_address(frag),
                                      skb_frag_size(frag),
                                      DMA_TO_DEVICE);
-               if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) {
+               if (unlikely(dma_mapping_error(lp->dev, phys))) {
                        if (net_ratelimit())
                                netdev_err(ndev, "TX DMA mapping error\n");
                        ndev->stats.tx_dropped++;
@@ -833,79 +899,84 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 }
 
 /**
- * axienet_recv - Is called from Axi DMA Rx Isr to complete the received
- *               BD processing.
- * @ndev:      Pointer to net_device structure.
+ * axienet_poll - Triggered by RX ISR to complete the received BD processing.
+ * @napi:      Pointer to NAPI structure.
+ * @budget:    Max number of packets to process.
  *
- * This function is invoked from the Axi DMA Rx isr to process the Rx BDs. It
- * does minimal processing and invokes "netif_rx" to complete further
- * processing.
+ * Return: Number of RX packets processed.
  */
-static void axienet_recv(struct net_device *ndev)
+static int axienet_poll(struct napi_struct *napi, int budget)
 {
        u32 length;
        u32 csumstatus;
        u32 size = 0;
-       u32 packets = 0;
+       int packets = 0;
        dma_addr_t tail_p = 0;
-       struct axienet_local *lp = netdev_priv(ndev);
-       struct sk_buff *skb, *new_skb;
        struct axidma_bd *cur_p;
+       struct sk_buff *skb, *new_skb;
+       struct axienet_local *lp = container_of(napi, struct axienet_local, napi);
 
        cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
 
-       while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
+       while (packets < budget && (cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
                dma_addr_t phys;
 
-               tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
-
                /* Ensure we see complete descriptor update */
                dma_rmb();
-               phys = desc_get_phys_addr(lp, cur_p);
-               dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size,
-                                DMA_FROM_DEVICE);
 
                skb = cur_p->skb;
                cur_p->skb = NULL;
-               length = cur_p->app4 & 0x0000FFFF;
-
-               skb_put(skb, length);
-               skb->protocol = eth_type_trans(skb, ndev);
-               /*skb_checksum_none_assert(skb);*/
-               skb->ip_summed = CHECKSUM_NONE;
-
-               /* if we're doing Rx csum offload, set it up */
-               if (lp->features & XAE_FEATURE_FULL_RX_CSUM) {
-                       csumstatus = (cur_p->app2 &
-                                     XAE_FULL_CSUM_STATUS_MASK) >> 3;
-                       if ((csumstatus == XAE_IP_TCP_CSUM_VALIDATED) ||
-                           (csumstatus == XAE_IP_UDP_CSUM_VALIDATED)) {
-                               skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+               /* skb could be NULL if a previous pass already received the
+                * packet for this slot in the ring, but failed to refill it
+                * with a newly allocated buffer. In this case, don't try to
+                * receive it again.
+                */
+               if (likely(skb)) {
+                       length = cur_p->app4 & 0x0000FFFF;
+
+                       phys = desc_get_phys_addr(lp, cur_p);
+                       dma_unmap_single(lp->dev, phys, lp->max_frm_size,
+                                        DMA_FROM_DEVICE);
+
+                       skb_put(skb, length);
+                       skb->protocol = eth_type_trans(skb, lp->ndev);
+                       /*skb_checksum_none_assert(skb);*/
+                       skb->ip_summed = CHECKSUM_NONE;
+
+                       /* if we're doing Rx csum offload, set it up */
+                       if (lp->features & XAE_FEATURE_FULL_RX_CSUM) {
+                               csumstatus = (cur_p->app2 &
+                                             XAE_FULL_CSUM_STATUS_MASK) >> 3;
+                               if (csumstatus == XAE_IP_TCP_CSUM_VALIDATED ||
+                                   csumstatus == XAE_IP_UDP_CSUM_VALIDATED) {
+                                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                               }
+                       } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 &&
+                                  skb->protocol == htons(ETH_P_IP) &&
+                                  skb->len > 64) {
+                               skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF);
+                               skb->ip_summed = CHECKSUM_COMPLETE;
                        }
-               } else if ((lp->features & XAE_FEATURE_PARTIAL_RX_CSUM) != 0 &&
-                          skb->protocol == htons(ETH_P_IP) &&
-                          skb->len > 64) {
-                       skb->csum = be32_to_cpu(cur_p->app3 & 0xFFFF);
-                       skb->ip_summed = CHECKSUM_COMPLETE;
-               }
 
-               netif_rx(skb);
+                       napi_gro_receive(napi, skb);
 
-               size += length;
-               packets++;
+                       size += length;
+                       packets++;
+               }
 
-               new_skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size);
+               new_skb = napi_alloc_skb(napi, lp->max_frm_size);
                if (!new_skb)
-                       return;
+                       break;
 
-               phys = dma_map_single(ndev->dev.parent, new_skb->data,
+               phys = dma_map_single(lp->dev, new_skb->data,
                                      lp->max_frm_size,
                                      DMA_FROM_DEVICE);
-               if (unlikely(dma_mapping_error(ndev->dev.parent, phys))) {
+               if (unlikely(dma_mapping_error(lp->dev, phys))) {
                        if (net_ratelimit())
-                               netdev_err(ndev, "RX DMA mapping error\n");
+                               netdev_err(lp->ndev, "RX DMA mapping error\n");
                        dev_kfree_skb(new_skb);
-                       return;
+                       break;
                }
                desc_set_phys_addr(lp, phys, cur_p);
 
@@ -913,16 +984,30 @@ static void axienet_recv(struct net_device *ndev)
                cur_p->status = 0;
                cur_p->skb = new_skb;
 
+               /* Only update tail_p to mark this slot as usable after it has
+                * been successfully refilled.
+                */
+               tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
+
                if (++lp->rx_bd_ci >= lp->rx_bd_num)
                        lp->rx_bd_ci = 0;
                cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
        }
 
-       ndev->stats.rx_packets += packets;
-       ndev->stats.rx_bytes += size;
+       lp->ndev->stats.rx_packets += packets;
+       lp->ndev->stats.rx_bytes += size;
 
        if (tail_p)
                axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, tail_p);
+
+       if (packets < budget && napi_complete_done(napi, packets)) {
+               /* Re-enable RX completion interrupts. This should
+                * cause an immediate interrupt if any RX packets are
+                * already pending.
+                */
+               axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
+       }
+       return packets;
 }
 
 /**
@@ -937,41 +1022,27 @@ static void axienet_recv(struct net_device *ndev)
  */
 static irqreturn_t axienet_tx_irq(int irq, void *_ndev)
 {
-       u32 cr;
        unsigned int status;
        struct net_device *ndev = _ndev;
        struct axienet_local *lp = netdev_priv(ndev);
 
        status = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
-       if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) {
-               axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status);
-               axienet_start_xmit_done(lp->ndev);
-               goto out;
-       }
+
        if (!(status & XAXIDMA_IRQ_ALL_MASK))
                return IRQ_NONE;
-       if (status & XAXIDMA_IRQ_ERROR_MASK) {
-               dev_err(&ndev->dev, "DMA Tx error 0x%x\n", status);
-               dev_err(&ndev->dev, "Current BD is at: 0x%x%08x\n",
-                       (lp->tx_bd_v[lp->tx_bd_ci]).phys_msb,
-                       (lp->tx_bd_v[lp->tx_bd_ci]).phys);
-
-               cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-               /* Disable coalesce, delay timer and error interrupts */
-               cr &= (~XAXIDMA_IRQ_ALL_MASK);
-               /* Write to the Tx channel control register */
-               axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
-
-               cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-               /* Disable coalesce, delay timer and error interrupts */
-               cr &= (~XAXIDMA_IRQ_ALL_MASK);
-               /* Write to the Rx channel control register */
-               axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
 
+       axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status);
+
+       if (unlikely(status & XAXIDMA_IRQ_ERROR_MASK)) {
+               netdev_err(ndev, "DMA Tx error 0x%x\n", status);
+               netdev_err(ndev, "Current BD is at: 0x%x%08x\n",
+                          (lp->tx_bd_v[lp->tx_bd_ci]).phys_msb,
+                          (lp->tx_bd_v[lp->tx_bd_ci]).phys);
                schedule_work(&lp->dma_err_task);
-               axienet_dma_out32(lp, XAXIDMA_TX_SR_OFFSET, status);
+       } else {
+               axienet_start_xmit_done(lp->ndev);
        }
-out:
+
        return IRQ_HANDLED;
 }
 
@@ -982,46 +1053,40 @@ out:
  *
  * Return: IRQ_HANDLED if device generated a RX interrupt, IRQ_NONE otherwise.
  *
- * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD
+ * This is the Axi DMA Rx Isr. It invokes NAPI polling to complete the RX BD
  * processing.
  */
 static irqreturn_t axienet_rx_irq(int irq, void *_ndev)
 {
-       u32 cr;
        unsigned int status;
        struct net_device *ndev = _ndev;
        struct axienet_local *lp = netdev_priv(ndev);
 
        status = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
-       if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) {
-               axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status);
-               axienet_recv(lp->ndev);
-               goto out;
-       }
+
        if (!(status & XAXIDMA_IRQ_ALL_MASK))
                return IRQ_NONE;
-       if (status & XAXIDMA_IRQ_ERROR_MASK) {
-               dev_err(&ndev->dev, "DMA Rx error 0x%x\n", status);
-               dev_err(&ndev->dev, "Current BD is at: 0x%x%08x\n",
-                       (lp->rx_bd_v[lp->rx_bd_ci]).phys_msb,
-                       (lp->rx_bd_v[lp->rx_bd_ci]).phys);
-
-               cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-               /* Disable coalesce, delay timer and error interrupts */
-               cr &= (~XAXIDMA_IRQ_ALL_MASK);
-               /* Finally write to the Tx channel control register */
-               axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
-
-               cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-               /* Disable coalesce, delay timer and error interrupts */
-               cr &= (~XAXIDMA_IRQ_ALL_MASK);
-               /* write to the Rx channel control register */
-               axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
 
+       axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status);
+
+       if (unlikely(status & XAXIDMA_IRQ_ERROR_MASK)) {
+               netdev_err(ndev, "DMA Rx error 0x%x\n", status);
+               netdev_err(ndev, "Current BD is at: 0x%x%08x\n",
+                          (lp->rx_bd_v[lp->rx_bd_ci]).phys_msb,
+                          (lp->rx_bd_v[lp->rx_bd_ci]).phys);
                schedule_work(&lp->dma_err_task);
-               axienet_dma_out32(lp, XAXIDMA_RX_SR_OFFSET, status);
+       } else {
+               /* Disable further RX completion interrupts and schedule
+                * NAPI receive.
+                */
+               u32 cr = lp->rx_dma_cr;
+
+               cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK);
+               axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
+
+               napi_schedule(&lp->napi);
        }
-out:
+
        return IRQ_HANDLED;
 }
 
@@ -1095,6 +1160,8 @@ static int axienet_open(struct net_device *ndev)
        /* Enable worker thread for Axi DMA error handling */
        INIT_WORK(&lp->dma_err_task, axienet_dma_err_handler);
 
+       napi_enable(&lp->napi);
+
        /* Enable interrupts for Axi DMA Tx */
        ret = request_irq(lp->tx_irq, axienet_tx_irq, IRQF_SHARED,
                          ndev->name, ndev);
@@ -1120,6 +1187,7 @@ err_eth_irq:
 err_rx_irq:
        free_irq(lp->tx_irq, ndev);
 err_tx_irq:
+       napi_disable(&lp->napi);
        phylink_stop(lp->phylink);
        phylink_disconnect_phy(lp->phylink);
        cancel_work_sync(&lp->dma_err_task);
@@ -1139,46 +1207,22 @@ err_tx_irq:
  */
 static int axienet_stop(struct net_device *ndev)
 {
-       u32 cr, sr;
-       int count;
        struct axienet_local *lp = netdev_priv(ndev);
 
        dev_dbg(&ndev->dev, "axienet_close()\n");
 
+       napi_disable(&lp->napi);
+
        phylink_stop(lp->phylink);
        phylink_disconnect_phy(lp->phylink);
 
        axienet_setoptions(ndev, lp->options &
                           ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
 
-       cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-       cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
-       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
-
-       cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-       cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK);
-       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
+       axienet_dma_stop(lp);
 
        axienet_iow(lp, XAE_IE_OFFSET, 0);
 
-       /* Give DMAs a chance to halt gracefully */
-       sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
-       for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
-               msleep(20);
-               sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET);
-       }
-
-       sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
-       for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) {
-               msleep(20);
-               sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET);
-       }
-
-       /* Do a reset to ensure DMA is really stopped */
-       axienet_lock_mii(lp);
-       __axienet_device_reset(lp);
-       axienet_unlock_mii(lp);
-
        cancel_work_sync(&lp->dma_err_task);
 
        if (lp->eth_irq > 0)
@@ -1449,14 +1493,12 @@ axienet_ethtools_get_coalesce(struct net_device *ndev,
                              struct kernel_ethtool_coalesce *kernel_coal,
                              struct netlink_ext_ack *extack)
 {
-       u32 regval = 0;
        struct axienet_local *lp = netdev_priv(ndev);
-       regval = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-       ecoalesce->rx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK)
-                                            >> XAXIDMA_COALESCE_SHIFT;
-       regval = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-       ecoalesce->tx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK)
-                                            >> XAXIDMA_COALESCE_SHIFT;
+
+       ecoalesce->rx_max_coalesced_frames = lp->coalesce_count_rx;
+       ecoalesce->rx_coalesce_usecs = lp->coalesce_usec_rx;
+       ecoalesce->tx_max_coalesced_frames = lp->coalesce_count_tx;
+       ecoalesce->tx_coalesce_usecs = lp->coalesce_usec_tx;
        return 0;
 }
 
@@ -1489,8 +1531,12 @@ axienet_ethtools_set_coalesce(struct net_device *ndev,
 
        if (ecoalesce->rx_max_coalesced_frames)
                lp->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames;
+       if (ecoalesce->rx_coalesce_usecs)
+               lp->coalesce_usec_rx = ecoalesce->rx_coalesce_usecs;
        if (ecoalesce->tx_max_coalesced_frames)
                lp->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames;
+       if (ecoalesce->tx_coalesce_usecs)
+               lp->coalesce_usec_tx = ecoalesce->tx_coalesce_usecs;
 
        return 0;
 }
@@ -1521,7 +1567,8 @@ static int axienet_ethtools_nway_reset(struct net_device *dev)
 }
 
 static const struct ethtool_ops axienet_ethtool_ops = {
-       .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES,
+       .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES |
+                                    ETHTOOL_COALESCE_USECS,
        .get_drvinfo    = axienet_ethtools_get_drvinfo,
        .get_regs_len   = axienet_ethtools_get_regs_len,
        .get_regs       = axienet_ethtools_get_regs,
@@ -1537,78 +1584,78 @@ static const struct ethtool_ops axienet_ethtool_ops = {
        .nway_reset     = axienet_ethtools_nway_reset,
 };
 
-static void axienet_mac_pcs_get_state(struct phylink_config *config,
-                                     struct phylink_link_state *state)
+static struct axienet_local *pcs_to_axienet_local(struct phylink_pcs *pcs)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       return container_of(pcs, struct axienet_local, pcs);
+}
 
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               phylink_mii_c22_pcs_get_state(lp->pcs_phy, state);
-               break;
-       default:
-               break;
-       }
+static void axienet_pcs_get_state(struct phylink_pcs *pcs,
+                                 struct phylink_link_state *state)
+{
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+
+       phylink_mii_c22_pcs_get_state(pcs_phy, state);
 }
 
-static void axienet_mac_an_restart(struct phylink_config *config)
+static void axienet_pcs_an_restart(struct phylink_pcs *pcs)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
-       struct axienet_local *lp = netdev_priv(ndev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
 
-       phylink_mii_c22_pcs_an_restart(lp->pcs_phy);
+       phylink_mii_c22_pcs_an_restart(pcs_phy);
 }
 
-static int axienet_mac_prepare(struct phylink_config *config, unsigned int mode,
-                              phy_interface_t iface)
+static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
+                             phy_interface_t interface,
+                             const unsigned long *advertising,
+                             bool permit_pause_to_mac)
 {
-       struct net_device *ndev = to_net_dev(config->dev);
+       struct mdio_device *pcs_phy = pcs_to_axienet_local(pcs)->pcs_phy;
+       struct net_device *ndev = pcs_to_axienet_local(pcs)->ndev;
        struct axienet_local *lp = netdev_priv(ndev);
        int ret;
 
-       switch (iface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               if (!lp->switch_x_sgmii)
-                       return 0;
-
-               ret = mdiobus_write(lp->pcs_phy->bus,
-                                   lp->pcs_phy->addr,
-                                   XLNX_MII_STD_SELECT_REG,
-                                   iface == PHY_INTERFACE_MODE_SGMII ?
+       if (lp->switch_x_sgmii) {
+               ret = mdiodev_write(pcs_phy, XLNX_MII_STD_SELECT_REG,
+                                   interface == PHY_INTERFACE_MODE_SGMII ?
                                        XLNX_MII_STD_SELECT_SGMII : 0);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to switch PHY interface: %d\n",
+               if (ret < 0) {
+                       netdev_warn(ndev,
+                                   "Failed to switch PHY interface: %d\n",
                                    ret);
-               return ret;
-       default:
-               return 0;
+                       return ret;
+               }
        }
+
+       ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising);
+       if (ret < 0)
+               netdev_warn(ndev, "Failed to configure PCS: %d\n", ret);
+
+       return ret;
 }
 
-static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
-                              const struct phylink_link_state *state)
+static const struct phylink_pcs_ops axienet_pcs_ops = {
+       .pcs_get_state = axienet_pcs_get_state,
+       .pcs_config = axienet_pcs_config,
+       .pcs_an_restart = axienet_pcs_an_restart,
+};
+
+static struct phylink_pcs *axienet_mac_select_pcs(struct phylink_config *config,
+                                                 phy_interface_t interface)
 {
        struct net_device *ndev = to_net_dev(config->dev);
        struct axienet_local *lp = netdev_priv(ndev);
-       int ret;
 
-       switch (state->interface) {
-       case PHY_INTERFACE_MODE_SGMII:
-       case PHY_INTERFACE_MODE_1000BASEX:
-               ret = phylink_mii_c22_pcs_config(lp->pcs_phy, mode,
-                                                state->interface,
-                                                state->advertising);
-               if (ret < 0)
-                       netdev_warn(ndev, "Failed to configure PCS: %d\n",
-                                   ret);
-               break;
+       if (interface == PHY_INTERFACE_MODE_1000BASEX ||
+           interface ==  PHY_INTERFACE_MODE_SGMII)
+               return &lp->pcs;
 
-       default:
-               break;
-       }
+       return NULL;
+}
+
+static void axienet_mac_config(struct phylink_config *config, unsigned int mode,
+                              const struct phylink_link_state *state)
+{
+       /* nothing meaningful to do */
 }
 
 static void axienet_mac_link_down(struct phylink_config *config,
@@ -1663,9 +1710,7 @@ static void axienet_mac_link_up(struct phylink_config *config,
 
 static const struct phylink_mac_ops axienet_phylink_ops = {
        .validate = phylink_generic_validate,
-       .mac_pcs_get_state = axienet_mac_pcs_get_state,
-       .mac_an_restart = axienet_mac_an_restart,
-       .mac_prepare = axienet_mac_prepare,
+       .mac_select_pcs = axienet_mac_select_pcs,
        .mac_config = axienet_mac_config,
        .mac_link_down = axienet_mac_link_down,
        .mac_link_up = axienet_mac_link_up,
@@ -1680,29 +1725,26 @@ static const struct phylink_mac_ops axienet_phylink_ops = {
  */
 static void axienet_dma_err_handler(struct work_struct *work)
 {
+       u32 i;
        u32 axienet_status;
-       u32 cr, i;
+       struct axidma_bd *cur_p;
        struct axienet_local *lp = container_of(work, struct axienet_local,
                                                dma_err_task);
        struct net_device *ndev = lp->ndev;
-       struct axidma_bd *cur_p;
+
+       napi_disable(&lp->napi);
 
        axienet_setoptions(ndev, lp->options &
                           ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN));
-       /* When we do an Axi Ethernet reset, it resets the complete core
-        * including the MDIO. MDIO must be disabled before resetting.
-        * Hold MDIO bus lock to avoid MDIO accesses during the reset.
-        */
-       axienet_lock_mii(lp);
-       __axienet_device_reset(lp);
-       axienet_unlock_mii(lp);
+
+       axienet_dma_stop(lp);
 
        for (i = 0; i < lp->tx_bd_num; i++) {
                cur_p = &lp->tx_bd_v[i];
                if (cur_p->cntrl) {
                        dma_addr_t addr = desc_get_phys_addr(lp, cur_p);
 
-                       dma_unmap_single(ndev->dev.parent, addr,
+                       dma_unmap_single(lp->dev, addr,
                                         (cur_p->cntrl &
                                          XAXIDMA_BD_CTRL_LENGTH_MASK),
                                         DMA_TO_DEVICE);
@@ -1735,50 +1777,7 @@ static void axienet_dma_err_handler(struct work_struct *work)
        lp->tx_bd_tail = 0;
        lp->rx_bd_ci = 0;
 
-       /* Start updating the Rx channel control register */
-       cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-       /* Update the interrupt coalesce count */
-       cr = ((cr & ~XAXIDMA_COALESCE_MASK) |
-             (XAXIDMA_DFT_RX_THRESHOLD << XAXIDMA_COALESCE_SHIFT));
-       /* Update the delay timer count */
-       cr = ((cr & ~XAXIDMA_DELAY_MASK) |
-             (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
-       /* Enable coalesce, delay timer and error interrupts */
-       cr |= XAXIDMA_IRQ_ALL_MASK;
-       /* Finally write to the Rx channel control register */
-       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr);
-
-       /* Start updating the Tx channel control register */
-       cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-       /* Update the interrupt coalesce count */
-       cr = (((cr & ~XAXIDMA_COALESCE_MASK)) |
-             (XAXIDMA_DFT_TX_THRESHOLD << XAXIDMA_COALESCE_SHIFT));
-       /* Update the delay timer count */
-       cr = (((cr & ~XAXIDMA_DELAY_MASK)) |
-             (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT));
-       /* Enable coalesce, delay timer and error interrupts */
-       cr |= XAXIDMA_IRQ_ALL_MASK;
-       /* Finally write to the Tx channel control register */
-       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
-
-       /* Populate the tail pointer and bring the Rx Axi DMA engine out of
-        * halted state. This will make the Rx side ready for reception.
-        */
-       axienet_dma_out_addr(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p);
-       cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET);
-       axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET,
-                         cr | XAXIDMA_CR_RUNSTOP_MASK);
-       axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p +
-                            (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1)));
-
-       /* Write to the RS (Run-stop) bit in the Tx channel control register.
-        * Tx channel is now ready to run. But only after we write to the
-        * tail pointer register that the Tx channel will start transmitting
-        */
-       axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p);
-       cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET);
-       axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET,
-                         cr | XAXIDMA_CR_RUNSTOP_MASK);
+       axienet_dma_start(lp);
 
        axienet_status = axienet_ior(lp, XAE_RCW1_OFFSET);
        axienet_status &= ~XAE_RCW1_RX_MASK;
@@ -1799,6 +1798,7 @@ static void axienet_dma_err_handler(struct work_struct *work)
        axienet_set_mac_address(ndev, NULL);
        axienet_set_multicast_list(ndev);
        axienet_setoptions(ndev, lp->options);
+       napi_enable(&lp->napi);
 }
 
 /**
@@ -1847,6 +1847,8 @@ static int axienet_probe(struct platform_device *pdev)
        lp->rx_bd_num = RX_BD_NUM_DEFAULT;
        lp->tx_bd_num = TX_BD_NUM_DEFAULT;
 
+       netif_napi_add(ndev, &lp->napi, axienet_poll, NAPI_POLL_WEIGHT);
+
        lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk");
        if (!lp->axi_clk) {
                /* For backward compatibility, if named AXI clock is not present,
@@ -2053,7 +2055,9 @@ static int axienet_probe(struct platform_device *pdev)
        }
 
        lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
+       lp->coalesce_usec_rx = XAXIDMA_DFT_RX_USEC;
        lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
+       lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC;
 
        /* Reset core now that clocks are enabled, prior to accessing MDIO */
        ret = __axienet_device_reset(lp);
@@ -2079,12 +2083,12 @@ static int axienet_probe(struct platform_device *pdev)
                        ret = -EPROBE_DEFER;
                        goto cleanup_mdio;
                }
-               lp->phylink_config.pcs_poll = true;
+               lp->pcs.ops = &axienet_pcs_ops;
+               lp->pcs.poll = true;
        }
 
        lp->phylink_config.dev = &ndev->dev;
        lp->phylink_config.type = PHYLINK_NETDEV;
-       lp->phylink_config.legacy_pre_march2020 = true;
        lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
                MAC_10FD | MAC_100FD | MAC_1000FD;