net: piggy back on the memory barrier in bql when waking queues
authorJakub Kicinski <kuba@kernel.org>
Fri, 7 Apr 2023 01:25:36 +0000 (18:25 -0700)
committerJakub Kicinski <kuba@kernel.org>
Tue, 11 Apr 2023 00:56:18 +0000 (17:56 -0700)
Drivers call netdev_tx_completed_queue() right before
netif_txq_maybe_wake(). If BQL is enabled netdev_tx_completed_queue()
should issue a memory barrier, so we can depend on that separating
the stop check from the consumer index update, instead of adding
another barrier in netif_txq_maybe_wake().

This matters more than the barriers on the xmit path, because
the wake condition is almost always true. So we issue the
consumer side barrier often.

Wrap netdev_tx_completed_queue() in a local helper to issue
the barrier even if BQL is disabled. Keep the same semantics
as netdev_tx_completed_queue() (barrier only if bytes != 0)
to make it clear that the barrier is conditional.

Plus since macro gets pkt/byte counts as arguments now -
we can skip waking if there were no packets completed.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
include/linux/netdevice.h
include/net/netdev_queues.h

index de97bee..f7602d8 100644 (file)
@@ -688,11 +688,11 @@ next_tx_int:
                dev_kfree_skb_any(skb);
        }
 
-       netdev_tx_completed_queue(txq, nr_pkts, tx_bytes);
        txr->tx_cons = cons;
 
-       __netif_txq_maybe_wake(txq, bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
-                              READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING);
+       __netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
+                                  bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
+                                  READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING);
 }
 
 static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
index cbbddee..f2604fc 100644 (file)
@@ -1251,15 +1251,13 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
        if (ring_is_xdp(tx_ring))
                return !!budget;
 
-       netdev_tx_completed_queue(txring_txq(tx_ring),
-                                 total_packets, total_bytes);
-
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
        txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
-       if (total_packets && netif_carrier_ok(tx_ring->netdev) &&
-           !__netif_txq_maybe_wake(txq, ixgbe_desc_unused(tx_ring),
-                                   TX_WAKE_THRESHOLD,
-                                   test_bit(__IXGBE_DOWN, &adapter->state)))
+       if (!__netif_txq_completed_wake(txq, total_packets, total_bytes,
+                                       ixgbe_desc_unused(tx_ring),
+                                       TX_WAKE_THRESHOLD,
+                                       netif_carrier_ok(tx_ring->netdev) &&
+                                       test_bit(__IXGBE_DOWN, &adapter->state)))
                ++tx_ring->tx_stats.restart_queue;
 
        return !!budget;
index 7bec9a2..fe35559 100644 (file)
@@ -3532,7 +3532,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
         * netdev_tx_sent_queue will miss the update and cause the queue to
         * be stopped forever
         */
-       smp_mb();
+       smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */
 
        if (unlikely(dql_avail(&dev_queue->dql) < 0))
                return;
index 5236d78..b26fdb4 100644 (file)
@@ -38,7 +38,7 @@
                netif_tx_stop_queue(txq);                               \
                /* Producer index and stop bit must be visible          \
                 * to consumer before we recheck.                       \
-                * Pairs with a barrier in __netif_txq_maybe_wake().    \
+                * Pairs with a barrier in __netif_txq_completed_wake(). \
                 */                                                     \
                smp_mb__after_atomic();                                 \
                                                                        \
                _res;                                                   \
        })                                                              \
 
+/* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if
+ * @bytes != 0, regardless of kernel config.
+ */
+static inline void
+netdev_txq_completed_mb(struct netdev_queue *dev_queue,
+                       unsigned int pkts, unsigned int bytes)
+{
+       if (IS_ENABLED(CONFIG_BQL))
+               netdev_tx_completed_queue(dev_queue, pkts, bytes);
+       else if (bytes)
+               smp_mb();
+}
 
 /**
- * __netif_txq_maybe_wake() - locklessly wake a Tx queue, if needed
+ * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed
  * @txq:       struct netdev_queue to stop/start
+ * @pkts:      number of packets completed
+ * @bytes:     number of bytes completed
  * @get_desc:  get current number of free descriptors (see requirements below!)
  * @start_thrs:        minimal number of descriptors to re-enable the queue
  * @down_cond: down condition, predicate indicating that the queue should
  * All arguments may be evaluated multiple times.
  * @get_desc must be a formula or a function call, it must always
  * return up-to-date information when evaluated!
+ * Reports completed pkts/bytes to BQL.
  *
  * Returns:
  *      0 if the queue was woken up
  *      1 if the queue was already enabled (or disabled but @down_cond is true)
  *     -1 if the queue was left unchanged (@start_thrs not reached)
  */
-#define __netif_txq_maybe_wake(txq, get_desc, start_thrs, down_cond)   \
+#define __netif_txq_completed_wake(txq, pkts, bytes,                   \
+                                  get_desc, start_thrs, down_cond)     \
        ({                                                              \
                int _res;                                               \
                                                                        \
+               /* Report to BQL and piggy back on its barrier.         \
+                * Barrier makes sure that anybody stopping the queue   \
+                * after this point sees the new consumer index.        \
+                * Pairs with barrier in netif_txq_try_stop().          \
+                */                                                     \
+               netdev_txq_completed_mb(txq, pkts, bytes);              \
+                                                                       \
                _res = -1;                                              \
-               if (likely(get_desc > start_thrs)) {                    \
-                       /* Make sure that anybody stopping the queue after \
-                        * this sees the new next_to_clean.             \
-                        */                                             \
-                       smp_mb();                                       \
+               if (pkts && likely(get_desc > start_thrs)) {            \
                        _res = 1;                                       \
                        if (unlikely(netif_tx_queue_stopped(txq)) &&    \
                            !(down_cond)) {                             \
                _res;                                                   \
        })
 
-#define netif_txq_maybe_wake(txq, get_desc, start_thrs)                \
-       __netif_txq_maybe_wake(txq, get_desc, start_thrs, false)
+#define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \
+       __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false)
 
 /* subqueue variants follow */