1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
4 * Copyright (C) 2015-2021 Google, Inc.
8 #include "gve_adminq.h"
10 #include <linux/etherdevice.h>
12 static void gve_rx_free_buffer(struct device *dev,
13 struct gve_rx_slot_page_info *page_info,
14 union gve_rx_data_slot *data_slot)
16 dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
17 GVE_DATA_SLOT_ADDR_PAGE_MASK);
19 gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
22 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
24 if (rx->data.raw_addressing) {
25 u32 slots = rx->mask + 1;
28 for (i = 0; i < slots; i++)
29 gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
30 &rx->data.data_ring[i]);
32 gve_unassign_qpl(priv, rx->data.qpl->id);
35 kvfree(rx->data.page_info);
36 rx->data.page_info = NULL;
39 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
41 struct gve_rx_ring *rx = &priv->rx[idx];
42 struct device *dev = &priv->pdev->dev;
43 u32 slots = rx->mask + 1;
46 gve_rx_remove_from_block(priv, idx);
48 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
49 dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
50 rx->desc.desc_ring = NULL;
52 dma_free_coherent(dev, sizeof(*rx->q_resources),
53 rx->q_resources, rx->q_resources_bus);
54 rx->q_resources = NULL;
56 gve_rx_unfill_pages(priv, rx);
58 bytes = sizeof(*rx->data.data_ring) * slots;
59 dma_free_coherent(dev, bytes, rx->data.data_ring,
61 rx->data.data_ring = NULL;
62 netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
65 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
66 dma_addr_t addr, struct page *page, __be64 *slot_addr)
68 page_info->page = page;
69 page_info->page_offset = 0;
70 page_info->page_address = page_address(page);
71 *slot_addr = cpu_to_be64(addr);
74 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
75 struct gve_rx_slot_page_info *page_info,
76 union gve_rx_data_slot *data_slot)
82 err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
86 gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
90 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
92 struct gve_priv *priv = rx->gve;
97 /* Allocate one page per Rx queue slot. Each page is split into two
98 * packet buffers, when possible we "page flip" between the two.
100 slots = rx->mask + 1;
102 rx->data.page_info = kvzalloc(slots *
103 sizeof(*rx->data.page_info), GFP_KERNEL);
104 if (!rx->data.page_info)
107 if (!rx->data.raw_addressing) {
108 rx->data.qpl = gve_assign_rx_qpl(priv);
110 kvfree(rx->data.page_info);
111 rx->data.page_info = NULL;
115 for (i = 0; i < slots; i++) {
116 if (!rx->data.raw_addressing) {
117 struct page *page = rx->data.qpl->pages[i];
118 dma_addr_t addr = i * PAGE_SIZE;
120 gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
121 &rx->data.data_ring[i].qpl_offset);
124 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
125 &rx->data.data_ring[i]);
133 gve_rx_free_buffer(&priv->pdev->dev,
134 &rx->data.page_info[i],
135 &rx->data.data_ring[i]);
139 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
141 struct gve_rx_ring *rx = &priv->rx[idx];
142 struct device *hdev = &priv->pdev->dev;
148 netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
149 /* Make sure everything is zeroed to start with */
150 memset(rx, 0, sizeof(*rx));
155 slots = priv->rx_data_slot_cnt;
156 rx->mask = slots - 1;
157 rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
159 /* alloc rx data ring */
160 bytes = sizeof(*rx->data.data_ring) * slots;
161 rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
164 if (!rx->data.data_ring)
166 filled_pages = gve_prefill_rx_pages(rx);
167 if (filled_pages < 0) {
169 goto abort_with_slots;
171 rx->fill_cnt = filled_pages;
172 /* Ensure data ring slots (packet buffers) are visible. */
175 /* Alloc gve_queue_resources */
177 dma_alloc_coherent(hdev,
178 sizeof(*rx->q_resources),
179 &rx->q_resources_bus,
181 if (!rx->q_resources) {
185 netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
186 (unsigned long)rx->data.data_bus);
188 /* alloc rx desc ring */
189 bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
190 npages = bytes / PAGE_SIZE;
191 if (npages * PAGE_SIZE != bytes) {
193 goto abort_with_q_resources;
196 rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
198 if (!rx->desc.desc_ring) {
200 goto abort_with_q_resources;
203 rx->db_threshold = priv->rx_desc_cnt / 2;
205 gve_rx_add_to_block(priv, idx);
209 abort_with_q_resources:
210 dma_free_coherent(hdev, sizeof(*rx->q_resources),
211 rx->q_resources, rx->q_resources_bus);
212 rx->q_resources = NULL;
214 gve_rx_unfill_pages(priv, rx);
216 bytes = sizeof(*rx->data.data_ring) * slots;
217 dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
218 rx->data.data_ring = NULL;
223 int gve_rx_alloc_rings(struct gve_priv *priv)
228 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
229 err = gve_rx_alloc_ring(priv, i);
231 netif_err(priv, drv, priv->dev,
232 "Failed to alloc rx ring=%d: err=%d\n",
237 /* Unallocate if there was an error */
241 for (j = 0; j < i; j++)
242 gve_rx_free_ring(priv, j);
247 void gve_rx_free_rings_gqi(struct gve_priv *priv)
251 for (i = 0; i < priv->rx_cfg.num_queues; i++)
252 gve_rx_free_ring(priv, i);
255 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
257 u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
259 iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
262 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
264 if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
265 return PKT_HASH_TYPE_L4;
266 if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
267 return PKT_HASH_TYPE_L3;
268 return PKT_HASH_TYPE_L2;
271 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
272 struct gve_rx_slot_page_info *page_info,
275 struct sk_buff *skb = napi_get_frags(napi);
280 skb_add_rx_frag(skb, 0, page_info->page,
281 page_info->page_offset +
282 GVE_RX_PAD, len, PAGE_SIZE / 2);
287 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
289 const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
291 /* "flip" to other packet buffer on this page */
292 page_info->page_offset ^= PAGE_SIZE / 2;
293 *(slot_addr) ^= offset;
296 static bool gve_rx_can_flip_buffers(struct net_device *netdev)
298 return PAGE_SIZE == 4096
299 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
302 static int gve_rx_can_recycle_buffer(struct page *page)
304 int pagecount = page_count(page);
306 /* This page is not being used by any SKBs - reuse */
309 /* This page is still being used by an SKB - we can't reuse */
310 else if (pagecount >= 2)
312 WARN(pagecount < 1, "Pagecount should never be < 1");
316 static struct sk_buff *
317 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
318 struct gve_rx_slot_page_info *page_info, u16 len,
319 struct napi_struct *napi,
320 union gve_rx_data_slot *data_slot)
324 skb = gve_rx_add_frags(napi, page_info, len);
328 /* Optimistically stop the kernel from freeing the page by increasing
329 * the page bias. We will check the refcount in refill to determine if
330 * we need to alloc a new page.
332 get_page(page_info->page);
337 static struct sk_buff *
338 gve_rx_qpl(struct device *dev, struct net_device *netdev,
339 struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
340 u16 len, struct napi_struct *napi,
341 union gve_rx_data_slot *data_slot)
345 /* if raw_addressing mode is not enabled gvnic can only receive into
346 * registered segments. If the buffer can't be recycled, our only
347 * choice is to copy the data out of it so that we can return it to the
350 if (page_info->can_flip) {
351 skb = gve_rx_add_frags(napi, page_info, len);
352 /* No point in recycling if we didn't get the skb */
354 /* Make sure that the page isn't freed. */
355 get_page(page_info->page);
356 gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
359 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
361 u64_stats_update_begin(&rx->statss);
363 u64_stats_update_end(&rx->statss);
369 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
370 netdev_features_t feat, u32 idx)
372 struct gve_rx_slot_page_info *page_info;
373 struct gve_priv *priv = rx->gve;
374 struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
375 struct net_device *dev = priv->dev;
376 union gve_rx_data_slot *data_slot;
377 struct sk_buff *skb = NULL;
381 /* drop this packet */
382 if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
383 u64_stats_update_begin(&rx->statss);
384 rx->rx_desc_err_dropped_pkt++;
385 u64_stats_update_end(&rx->statss);
389 len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
390 page_info = &rx->data.page_info[idx];
392 data_slot = &rx->data.data_ring[idx];
393 page_bus = (rx->data.raw_addressing) ?
394 be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
395 rx->data.qpl->page_buses[idx];
396 dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
397 PAGE_SIZE, DMA_FROM_DEVICE);
399 if (len <= priv->rx_copybreak) {
400 /* Just copy small packets */
401 skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD);
402 u64_stats_update_begin(&rx->statss);
404 rx->rx_copybreak_pkt++;
405 u64_stats_update_end(&rx->statss);
407 u8 can_flip = gve_rx_can_flip_buffers(dev);
411 recycle = gve_rx_can_recycle_buffer(page_info->page);
413 if (!rx->data.raw_addressing)
414 gve_schedule_reset(priv);
419 page_info->can_flip = can_flip && recycle;
420 if (rx->data.raw_addressing) {
421 skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
422 page_info, len, napi,
425 skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
426 page_info, len, napi, data_slot);
431 u64_stats_update_begin(&rx->statss);
432 rx->rx_skb_alloc_fail++;
433 u64_stats_update_end(&rx->statss);
437 if (likely(feat & NETIF_F_RXCSUM)) {
438 /* NIC passes up the partial sum */
440 skb->ip_summed = CHECKSUM_COMPLETE;
442 skb->ip_summed = CHECKSUM_NONE;
443 skb->csum = csum_unfold(rx_desc->csum);
446 /* parse flags & pass relevant info up */
447 if (likely(feat & NETIF_F_RXHASH) &&
448 gve_needs_rss(rx_desc->flags_seq))
449 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
450 gve_rss_type(rx_desc->flags_seq));
452 if (skb_is_nonlinear(skb))
453 napi_gro_frags(napi);
455 napi_gro_receive(napi, skb);
459 static bool gve_rx_work_pending(struct gve_rx_ring *rx)
461 struct gve_rx_desc *desc;
465 next_idx = rx->cnt & rx->mask;
466 desc = rx->desc.desc_ring + next_idx;
468 flags_seq = desc->flags_seq;
469 /* Make sure we have synchronized the seq no with the device */
472 return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
475 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
477 int refill_target = rx->mask + 1;
478 u32 fill_cnt = rx->fill_cnt;
480 while (fill_cnt - rx->cnt < refill_target) {
481 struct gve_rx_slot_page_info *page_info;
482 u32 idx = fill_cnt & rx->mask;
484 page_info = &rx->data.page_info[idx];
485 if (page_info->can_flip) {
486 /* The other half of the page is free because it was
487 * free when we processed the descriptor. Flip to it.
489 union gve_rx_data_slot *data_slot =
490 &rx->data.data_ring[idx];
492 gve_rx_flip_buff(page_info, &data_slot->addr);
493 page_info->can_flip = 0;
495 /* It is possible that the networking stack has already
496 * finished processing all outstanding packets in the buffer
497 * and it can be reused.
498 * Flipping is unnecessary here - if the networking stack still
499 * owns half the page it is impossible to tell which half. Either
500 * the whole page is free or it needs to be replaced.
502 int recycle = gve_rx_can_recycle_buffer(page_info->page);
505 if (!rx->data.raw_addressing)
506 gve_schedule_reset(priv);
510 /* We can't reuse the buffer - alloc a new one*/
511 union gve_rx_data_slot *data_slot =
512 &rx->data.data_ring[idx];
513 struct device *dev = &priv->pdev->dev;
515 gve_rx_free_buffer(dev, page_info, data_slot);
516 page_info->page = NULL;
517 if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
523 rx->fill_cnt = fill_cnt;
527 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
528 netdev_features_t feat)
530 struct gve_priv *priv = rx->gve;
531 u32 work_done = 0, packets = 0;
532 struct gve_rx_desc *desc;
534 u32 idx = cnt & rx->mask;
537 desc = rx->desc.desc_ring + idx;
538 while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
539 work_done < budget) {
542 netif_info(priv, rx_status, priv->dev,
543 "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
544 rx->q_num, idx, desc, desc->flags_seq);
545 netif_info(priv, rx_status, priv->dev,
546 "[%d] seqno=%d rx->desc.seqno=%d\n",
547 rx->q_num, GVE_SEQNO(desc->flags_seq),
549 dropped = !gve_rx(rx, desc, feat, idx);
551 bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
555 idx = cnt & rx->mask;
556 desc = rx->desc.desc_ring + idx;
557 rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
561 if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
564 u64_stats_update_begin(&rx->statss);
565 rx->rpackets += packets;
567 u64_stats_update_end(&rx->statss);
570 /* restock ring slots */
571 if (!rx->data.raw_addressing) {
572 /* In QPL mode buffs are refilled as the desc are processed */
573 rx->fill_cnt += work_done;
574 } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
575 /* In raw addressing mode buffs are only refilled if the avail
576 * falls below a threshold.
578 if (!gve_rx_refill_buffers(priv, rx))
581 /* If we were not able to completely refill buffers, we'll want
582 * to schedule this queue for work again to refill buffers.
584 if (rx->fill_cnt - cnt <= rx->db_threshold) {
585 gve_rx_write_doorbell(priv, rx);
590 gve_rx_write_doorbell(priv, rx);
591 return gve_rx_work_pending(rx);
594 bool gve_rx_poll(struct gve_notify_block *block, int budget)
596 struct gve_rx_ring *rx = block->rx;
597 netdev_features_t feat;
600 feat = block->napi.dev->features;
602 /* If budget is 0, do all the work */
607 repoll |= gve_clean_rx_done(rx, budget, feat);
609 repoll |= gve_rx_work_pending(rx);