net: ena: Fix DMA syncing in XDP path when SWIOTLB is on
authorDavid Arinzon <darinzon@amazon.com>
Mon, 11 Dec 2023 06:28:00 +0000 (06:28 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 20 Dec 2023 16:01:49 +0000 (17:01 +0100)
[ Upstream commit d760117060cf2e90b5c59c5492cab179a4dbce01 ]

This patch fixes two issues:

Issue 1
-------
Description
```````````
Current code does not call dma_sync_single_for_cpu() to sync data from
the device side memory to the CPU side memory before the XDP code path
uses the CPU side data.
This causes the XDP code path to read the unset garbage data in the CPU
side memory, resulting in incorrect handling of the packet by XDP.

Solution
````````
1. Add a call to dma_sync_single_for_cpu() before the XDP code starts to
   use the data in the CPU side memory.
2. The XDP code verdict can be XDP_PASS, in which case there is a
   fallback to the non-XDP code, which also calls
   dma_sync_single_for_cpu().
   To avoid calling dma_sync_single_for_cpu() twice:
2.1. Put the dma_sync_single_for_cpu() in the code in such a place where
     it happens before XDP and non-XDP code.
2.2. Remove the calls to dma_sync_single_for_cpu() in the non-XDP code
     for the first buffer only (rx_copybreak and non-rx_copybreak
     cases), since the new call that was added covers these cases.
     The call to dma_sync_single_for_cpu() for the second buffer and on
     stays because only the first buffer is handled by the newly added
     dma_sync_single_for_cpu(). And there is no need for special
     handling of the second buffer and on for the XDP path since
     currently the driver supports only single buffer packets.

Issue 2
-------
Description
```````````
In case the XDP code forwarded the packet (ENA_XDP_FORWARDED),
ena_unmap_rx_buff_attrs() is called with attrs set to 0.
This means that before unmapping the buffer, the internal function
dma_unmap_page_attrs() will also call dma_sync_single_for_cpu() on
the whole buffer (not only on the data part of it).
This sync is both wasteful (since a sync was already explicitly
called before) and also causes a bug, which will be explained
using the below diagram.

The following diagram shows the flow of events causing the bug.
The order of events is (1)-(4) as shown in the diagram.

CPU side memory area

     (3)convert_to_xdp_frame() initializes the
        headroom with xdpf metadata
                      ||
                      \/
          ___________________________________
         |                                   |
 0       |                                   V                       4K
 ---------------------------------------------------------------------
 | xdpf->data      | other xdpf       |   < data >   | tailroom ||...|
 |                 | fields           |              | GARBAGE  ||   |
 ---------------------------------------------------------------------

                   /\                        /\
                   ||                        ||
   (4)ena_unmap_rx_buff_attrs() calls     (2)dma_sync_single_for_cpu()
      dma_sync_single_for_cpu() on the       copies data from device
      whole buffer page, overwriting         side to CPU side memory
      the xdpf->data with GARBAGE.           ||
 0                                                                   4K
 ---------------------------------------------------------------------
 | headroom                           |   < data >   | tailroom ||...|
 | GARBAGE                            |              | GARBAGE  ||   |
 ---------------------------------------------------------------------

Device side memory area                      /\
                                             ||
                               (1) device writes RX packet data

After the call to ena_unmap_rx_buff_attrs() in (4), the xdpf->data
becomes corrupted, and so when it is later accessed in
ena_clean_xdp_irq()->xdp_return_frame(), it causes a page fault,
crashing the kernel.

Solution
````````
Explicitly tell ena_unmap_rx_buff_attrs() not to call
dma_sync_single_for_cpu() by passing it the ENA_DMA_ATTR_SKIP_CPU_SYNC
flag.

Fixes: f7d625adeb7b ("net: ena: Add dynamic recycling mechanism for rx buffers")
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David Arinzon <darinzon@amazon.com>
Link: https://lore.kernel.org/r/20231211062801.27891-4-darinzon@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/amazon/ena/ena_netdev.c

index b638e1d..14e41eb 100644 (file)
@@ -1493,11 +1493,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
                if (unlikely(!skb))
                        return NULL;
 
-               /* sync this buffer for CPU use */
-               dma_sync_single_for_cpu(rx_ring->dev,
-                                       dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
-                                       len,
-                                       DMA_FROM_DEVICE);
                skb_copy_to_linear_data(skb, buf_addr + buf_offset, len);
                dma_sync_single_for_device(rx_ring->dev,
                                           dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
@@ -1516,17 +1511,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
        buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
 
-       pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr);
-
        /* If XDP isn't loaded try to reuse part of the RX buffer */
        reuse_rx_buf_page = !is_xdp_loaded &&
                            ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
 
-       dma_sync_single_for_cpu(rx_ring->dev,
-                               pre_reuse_paddr + pkt_offset,
-                               len,
-                               DMA_FROM_DEVICE);
-
        if (!reuse_rx_buf_page)
                ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
 
@@ -1723,6 +1711,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
        int xdp_flags = 0;
        int total_len = 0;
        int xdp_verdict;
+       u8 pkt_offset;
        int rc = 0;
        int i;
 
@@ -1749,13 +1738,19 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 
                /* First descriptor might have an offset set by the device */
                rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
-               rx_info->buf_offset += ena_rx_ctx.pkt_offset;
+               pkt_offset = ena_rx_ctx.pkt_offset;
+               rx_info->buf_offset += pkt_offset;
 
                netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
                          "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
                          rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
                          ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
 
+               dma_sync_single_for_cpu(rx_ring->dev,
+                                       dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
+                                       rx_ring->ena_bufs[0].len,
+                                       DMA_FROM_DEVICE);
+
                if (ena_xdp_present_ring(rx_ring))
                        xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs);
 
@@ -1781,7 +1776,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
                                if (xdp_verdict & ENA_XDP_FORWARDED) {
                                        ena_unmap_rx_buff_attrs(rx_ring,
                                                                &rx_ring->rx_buffer_info[req_id],
-                                                               0);
+                                                               DMA_ATTR_SKIP_CPU_SYNC);
                                        rx_ring->rx_buffer_info[req_id].page = NULL;
                                }
                        }