sfc: support passing a representor to the EF100 TX path
authorEdward Cree <ecree.xilinx@gmail.com>
Wed, 20 Jul 2022 18:33:47 +0000 (19:33 +0100)
committerDavid S. Miller <davem@davemloft.net>
Fri, 22 Jul 2022 11:50:06 +0000 (12:50 +0100)
A non-null efv in __ef100_enqueue_skb() indicates that the packet is
 from that representor, should be transmitted with a suitable option
 descriptor (to instruct the switch to deliver it to the representee),
 and should not be accounted to the parent PF's stats or BQL.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/sfc/ef100_rep.h
drivers/net/ethernet/sfc/ef100_tx.c
drivers/net/ethernet/sfc/ef100_tx.h
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/tx.c
drivers/net/ethernet/sfc/tx_common.c
drivers/net/ethernet/sfc/tx_common.h

index 1d17aaf..d47fd8f 100644 (file)
 
 #include "net_driver.h"
 
+struct efx_rep_sw_stats {
+       atomic64_t rx_packets, tx_packets;
+       atomic64_t rx_bytes, tx_bytes;
+       atomic64_t rx_dropped, tx_errors;
+};
+
 /**
  * struct efx_rep - Private data for an Efx representor
  *
@@ -24,6 +30,7 @@
  * @mport: m-port ID of corresponding VF
  * @idx: VF index
  * @list: entry on efx->vf_reps
+ * @stats: software traffic counters for netdev stats
  */
 struct efx_rep {
        struct efx_nic *parent;
@@ -32,6 +39,7 @@ struct efx_rep {
        u32 mport;
        unsigned int idx;
        struct list_head list;
+       struct efx_rep_sw_stats stats;
 };
 
 int efx_ef100_vfrep_create(struct efx_nic *efx, unsigned int i);
index 26ef51d..102ddc7 100644 (file)
@@ -254,7 +254,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx,
 
 static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
                                      const struct sk_buff *skb,
-                                     unsigned int segment_count)
+                                     unsigned int segment_count,
+                                     struct efx_rep *efv)
 {
        unsigned int old_write_count = tx_queue->write_count;
        unsigned int new_write_count = old_write_count;
@@ -272,6 +273,20 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
        else
                next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND;
 
+       if (unlikely(efv)) {
+               /* Create TX override descriptor */
+               write_ptr = new_write_count & tx_queue->ptr_mask;
+               txd = ef100_tx_desc(tx_queue, write_ptr);
+               ++new_write_count;
+
+               tx_queue->packet_write_count = new_write_count;
+               EFX_POPULATE_OWORD_3(*txd,
+                                    ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX,
+                                    ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport,
+                                    ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1);
+               nr_descs--;
+       }
+
        /* if it's a raw write (such as XDP) then always SEND single frames */
        if (!skb)
                nr_descs = 1;
@@ -306,6 +321,9 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
                /* if it's a raw write (such as XDP) then always SEND */
                next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG :
                                       ESE_GZ_TX_DESC_TYPE_SEND;
+               /* mark as an EFV buffer if applicable */
+               if (unlikely(efv))
+                       buffer->flags |= EFX_TX_BUF_EFV;
 
        } while (new_write_count != tx_queue->insert_count);
 
@@ -324,7 +342,7 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
 
 void ef100_tx_write(struct efx_tx_queue *tx_queue)
 {
-       ef100_tx_make_descriptors(tx_queue, NULL, 0);
+       ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL);
        ef100_tx_push_buffers(tx_queue);
 }
 
@@ -351,6 +369,12 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
  */
 int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 {
+       return __ef100_enqueue_skb(tx_queue, skb, NULL);
+}
+
+int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+                       struct efx_rep *efv)
+{
        unsigned int old_insert_count = tx_queue->insert_count;
        struct efx_nic *efx = tx_queue->efx;
        bool xmit_more = netdev_xmit_more();
@@ -376,16 +400,64 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
                        return 0;
        }
 
+       if (unlikely(efv)) {
+               struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+
+               /* Drop representor packets if the queue is stopped.
+                * We currently don't assert backoff to representors so this is
+                * to make sure representor traffic can't starve the main
+                * net device.
+                * And, of course, if there are no TX descriptors left.
+                */
+               if (netif_tx_queue_stopped(tx_queue->core_txq) ||
+                   unlikely(efx_tx_buffer_in_use(buffer))) {
+                       atomic64_inc(&efv->stats.tx_errors);
+                       rc = -ENOSPC;
+                       goto err;
+               }
+
+               /* Also drop representor traffic if it could cause us to
+                * stop the queue. If we assert backoff and we haven't
+                * received traffic on the main net device recently then the
+                * TX watchdog can go off erroneously.
+                */
+               fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
+               fill_level += efx_tx_max_skb_descs(efx);
+               if (fill_level > efx->txq_stop_thresh) {
+                       struct efx_tx_queue *txq2;
+
+                       /* Refresh cached fill level and re-check */
+                       efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
+                               txq2->old_read_count = READ_ONCE(txq2->read_count);
+
+                       fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
+                       fill_level += efx_tx_max_skb_descs(efx);
+                       if (fill_level > efx->txq_stop_thresh) {
+                               atomic64_inc(&efv->stats.tx_errors);
+                               rc = -ENOSPC;
+                               goto err;
+                       }
+               }
+
+               buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV;
+               tx_queue->insert_count++;
+       }
+
        /* Map for DMA and create descriptors */
        rc = efx_tx_map_data(tx_queue, skb, segments);
        if (rc)
                goto err;
-       ef100_tx_make_descriptors(tx_queue, skb, segments);
+       ef100_tx_make_descriptors(tx_queue, skb, segments, efv);
 
        fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
        if (fill_level > efx->txq_stop_thresh) {
                struct efx_tx_queue *txq2;
 
+               /* Because of checks above, representor traffic should
+                * not be able to stop the queue.
+                */
+               WARN_ON(efv);
+
                netif_tx_stop_queue(tx_queue->core_txq);
                /* Re-read after a memory barrier in case we've raced with
                 * the completion path. Otherwise there's a danger we'll never
@@ -404,8 +476,12 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
        /* If xmit_more then we don't need to push the doorbell, unless there
         * are 256 descriptors already queued in which case we have to push to
         * ensure we never push more than 256 at once.
+        *
+        * Always push for representor traffic, and don't account it to parent
+        * PF netdevice's BQL.
         */
-       if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
+       if (unlikely(efv) ||
+           __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
            tx_queue->write_count - tx_queue->notify_count > 255)
                ef100_tx_push_buffers(tx_queue);
 
index ddc4b98..e9e1154 100644 (file)
@@ -13,6 +13,7 @@
 #define EFX_EF100_TX_H
 
 #include "net_driver.h"
+#include "ef100_rep.h"
 
 int ef100_tx_probe(struct efx_tx_queue *tx_queue);
 void ef100_tx_init(struct efx_tx_queue *tx_queue);
@@ -22,4 +23,6 @@ unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx);
 void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event);
 
 netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
+int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+                       struct efx_rep *efv);
 #endif
index 037cfa1..4cde54c 100644 (file)
@@ -178,6 +178,7 @@ struct efx_tx_buffer {
 #define EFX_TX_BUF_OPTION      0x10    /* empty buffer for option descriptor */
 #define EFX_TX_BUF_XDP         0x20    /* buffer was sent with XDP */
 #define EFX_TX_BUF_TSO_V3      0x40    /* empty buffer for a TSO_V3 descriptor */
+#define EFX_TX_BUF_EFV         0x100   /* buffer was sent from representor */
 
 /**
  * struct efx_tx_queue - An Efx TX queue
index 79cc0bb..d124740 100644 (file)
@@ -559,6 +559,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
 void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
 {
        unsigned int pkts_compl = 0, bytes_compl = 0;
+       unsigned int efv_pkts_compl = 0;
        unsigned int read_ptr;
        bool finished = false;
 
@@ -580,7 +581,8 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
                /* Need to check the flag before dequeueing. */
                if (buffer->flags & EFX_TX_BUF_SKB)
                        finished = true;
-               efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+               efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
+                                  &efv_pkts_compl);
 
                ++tx_queue->read_count;
                read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -589,7 +591,7 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
        tx_queue->pkts_compl += pkts_compl;
        tx_queue->bytes_compl += bytes_compl;
 
-       EFX_WARN_ON_PARANOID(pkts_compl != 1);
+       EFX_WARN_ON_PARANOID(pkts_compl + efv_pkts_compl != 1);
 
        efx_xmit_done_check_empty(tx_queue);
 }
index 658ea2d..67e789b 100644 (file)
@@ -109,9 +109,11 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
        /* Free any buffers left in the ring */
        while (tx_queue->read_count != tx_queue->write_count) {
                unsigned int pkts_compl = 0, bytes_compl = 0;
+               unsigned int efv_pkts_compl = 0;
 
                buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
-               efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+               efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
+                                  &efv_pkts_compl);
 
                ++tx_queue->read_count;
        }
@@ -146,7 +148,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
                        struct efx_tx_buffer *buffer,
                        unsigned int *pkts_compl,
-                       unsigned int *bytes_compl)
+                       unsigned int *bytes_compl,
+                       unsigned int *efv_pkts_compl)
 {
        if (buffer->unmap_len) {
                struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
@@ -164,9 +167,15 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
        if (buffer->flags & EFX_TX_BUF_SKB) {
                struct sk_buff *skb = (struct sk_buff *)buffer->skb;
 
-               EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
-               (*pkts_compl)++;
-               (*bytes_compl) += skb->len;
+               if (unlikely(buffer->flags & EFX_TX_BUF_EFV)) {
+                       EFX_WARN_ON_PARANOID(!efv_pkts_compl);
+                       (*efv_pkts_compl)++;
+               } else {
+                       EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
+                       (*pkts_compl)++;
+                       (*bytes_compl) += skb->len;
+               }
+
                if (tx_queue->timestamping &&
                    (tx_queue->completed_timestamp_major ||
                     tx_queue->completed_timestamp_minor)) {
@@ -199,7 +208,8 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
                                unsigned int index,
                                unsigned int *pkts_compl,
-                               unsigned int *bytes_compl)
+                               unsigned int *bytes_compl,
+                               unsigned int *efv_pkts_compl)
 {
        struct efx_nic *efx = tx_queue->efx;
        unsigned int stop_index, read_ptr;
@@ -218,7 +228,8 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
                        return;
                }
 
-               efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+               efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl,
+                                  efv_pkts_compl);
 
                ++tx_queue->read_count;
                read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -241,15 +252,17 @@ void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 {
        unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
+       unsigned int efv_pkts_compl = 0;
        struct efx_nic *efx = tx_queue->efx;
 
        EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
 
-       efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+       efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl,
+                           &efv_pkts_compl);
        tx_queue->pkts_compl += pkts_compl;
        tx_queue->bytes_compl += bytes_compl;
 
-       if (pkts_compl > 1)
+       if (pkts_compl + efv_pkts_compl > 1)
                ++tx_queue->merge_events;
 
        /* See if we need to restart the netif queue.  This memory
@@ -274,6 +287,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
                        unsigned int insert_count)
 {
+       unsigned int efv_pkts_compl = 0;
        struct efx_tx_buffer *buffer;
        unsigned int bytes_compl = 0;
        unsigned int pkts_compl = 0;
@@ -282,7 +296,8 @@ void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
        while (tx_queue->insert_count != insert_count) {
                --tx_queue->insert_count;
                buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
-               efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+               efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
+                                  &efv_pkts_compl);
        }
 }
 
index bbab7f2..d87aecb 100644 (file)
@@ -19,7 +19,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
 void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
                        struct efx_tx_buffer *buffer,
                        unsigned int *pkts_compl,
-                       unsigned int *bytes_compl);
+                       unsigned int *bytes_compl,
+                       unsigned int *efv_pkts_compl);
 
 static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer)
 {