ibmvnic: Ensure that device queue memory is cache-line aligned
authorDwip N. Banerjee <dnbanerg@us.ibm.com>
Thu, 19 Nov 2020 01:12:22 +0000 (19:12 -0600)
committerJakub Kicinski <kuba@kernel.org>
Sat, 21 Nov 2020 03:50:34 +0000 (19:50 -0800)
PCI bus slowdowns were observed on IBM VNIC devices as a result
of partial cache line writes and non-cache aligned full cache line writes.
Ensure that packet data buffers are cache-line aligned to avoid these
slowdowns.

Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h

index e9b0cb6..85df91c 100644 (file)
@@ -498,7 +498,7 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 
                if (rx_pool->buff_size != buff_size) {
                        free_long_term_buff(adapter, &rx_pool->long_term_buff);
-                       rx_pool->buff_size = buff_size;
+                       rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
                        rc = alloc_long_term_buff(adapter,
                                                  &rx_pool->long_term_buff,
                                                  rx_pool->size *
@@ -592,7 +592,7 @@ static int init_rx_pools(struct net_device *netdev)
 
                rx_pool->size = adapter->req_rx_add_entries_per_subcrq;
                rx_pool->index = i;
-               rx_pool->buff_size = buff_size;
+               rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
                rx_pool->active = 1;
 
                rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
@@ -745,6 +745,7 @@ static int init_tx_pools(struct net_device *netdev)
 {
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
        int tx_subcrqs;
+       u64 buff_size;
        int i, rc;
 
        tx_subcrqs = adapter->num_active_tx_scrqs;
@@ -761,9 +762,11 @@ static int init_tx_pools(struct net_device *netdev)
        adapter->num_active_tx_pools = tx_subcrqs;
 
        for (i = 0; i < tx_subcrqs; i++) {
+               buff_size = adapter->req_mtu + VLAN_HLEN;
+               buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
                rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
                                      adapter->req_tx_entries_per_subcrq,
-                                     adapter->req_mtu + VLAN_HLEN);
+                                     buff_size);
                if (rc) {
                        release_tx_pools(adapter);
                        return rc;
index 16d892c..9911d92 100644 (file)
@@ -883,7 +883,7 @@ struct ibmvnic_sub_crq_queue {
        atomic_t used;
        char name[32];
        u64 handle;
-};
+} ____cacheline_aligned;
 
 struct ibmvnic_long_term_buff {
        unsigned char *buff;
@@ -907,7 +907,7 @@ struct ibmvnic_tx_pool {
        struct ibmvnic_long_term_buff long_term_buff;
        int num_buffers;
        int buf_size;
-};
+} ____cacheline_aligned;
 
 struct ibmvnic_rx_buff {
        struct sk_buff *skb;
@@ -928,7 +928,7 @@ struct ibmvnic_rx_pool {
        int next_alloc;
        int active;
        struct ibmvnic_long_term_buff long_term_buff;
-};
+} ____cacheline_aligned;
 
 struct ibmvnic_vpd {
        unsigned char *buff;
@@ -1015,8 +1015,8 @@ struct ibmvnic_adapter {
        atomic_t running_cap_crqs;
        bool wait_capability;
 
-       struct ibmvnic_sub_crq_queue **tx_scrq;
-       struct ibmvnic_sub_crq_queue **rx_scrq;
+       struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned;
+       struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned;
 
        /* rx structs */
        struct napi_struct *napi;