Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[platform/kernel/linux-rpi.git] / drivers / net / ethernet / google / gve / gve_rx.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include "gve_utils.h"
10 #include <linux/etherdevice.h>
11
12 static void gve_rx_free_buffer(struct device *dev,
13                                struct gve_rx_slot_page_info *page_info,
14                                union gve_rx_data_slot *data_slot)
15 {
16         dma_addr_t dma = (dma_addr_t)(be64_to_cpu(data_slot->addr) &
17                                       GVE_DATA_SLOT_ADDR_PAGE_MASK);
18
19         gve_free_page(dev, page_info->page, dma, DMA_FROM_DEVICE);
20 }
21
22 static void gve_rx_unfill_pages(struct gve_priv *priv, struct gve_rx_ring *rx)
23 {
24         if (rx->data.raw_addressing) {
25                 u32 slots = rx->mask + 1;
26                 int i;
27
28                 for (i = 0; i < slots; i++)
29                         gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i],
30                                            &rx->data.data_ring[i]);
31         } else {
32                 gve_unassign_qpl(priv, rx->data.qpl->id);
33                 rx->data.qpl = NULL;
34         }
35         kvfree(rx->data.page_info);
36         rx->data.page_info = NULL;
37 }
38
39 static void gve_rx_free_ring(struct gve_priv *priv, int idx)
40 {
41         struct gve_rx_ring *rx = &priv->rx[idx];
42         struct device *dev = &priv->pdev->dev;
43         u32 slots = rx->mask + 1;
44         size_t bytes;
45
46         gve_rx_remove_from_block(priv, idx);
47
48         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
49         dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus);
50         rx->desc.desc_ring = NULL;
51
52         dma_free_coherent(dev, sizeof(*rx->q_resources),
53                           rx->q_resources, rx->q_resources_bus);
54         rx->q_resources = NULL;
55
56         gve_rx_unfill_pages(priv, rx);
57
58         bytes = sizeof(*rx->data.data_ring) * slots;
59         dma_free_coherent(dev, bytes, rx->data.data_ring,
60                           rx->data.data_bus);
61         rx->data.data_ring = NULL;
62         netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx);
63 }
64
65 static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info,
66                              dma_addr_t addr, struct page *page, __be64 *slot_addr)
67 {
68         page_info->page = page;
69         page_info->page_offset = 0;
70         page_info->page_address = page_address(page);
71         *slot_addr = cpu_to_be64(addr);
72 }
73
74 static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev,
75                                struct gve_rx_slot_page_info *page_info,
76                                union gve_rx_data_slot *data_slot)
77 {
78         struct page *page;
79         dma_addr_t dma;
80         int err;
81
82         err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE);
83         if (err)
84                 return err;
85
86         gve_setup_rx_buffer(page_info, dma, page, &data_slot->addr);
87         return 0;
88 }
89
90 static int gve_prefill_rx_pages(struct gve_rx_ring *rx)
91 {
92         struct gve_priv *priv = rx->gve;
93         u32 slots;
94         int err;
95         int i;
96
97         /* Allocate one page per Rx queue slot. Each page is split into two
98          * packet buffers, when possible we "page flip" between the two.
99          */
100         slots = rx->mask + 1;
101
102         rx->data.page_info = kvzalloc(slots *
103                                       sizeof(*rx->data.page_info), GFP_KERNEL);
104         if (!rx->data.page_info)
105                 return -ENOMEM;
106
107         if (!rx->data.raw_addressing) {
108                 rx->data.qpl = gve_assign_rx_qpl(priv);
109                 if (!rx->data.qpl) {
110                         kvfree(rx->data.page_info);
111                         rx->data.page_info = NULL;
112                         return -ENOMEM;
113                 }
114         }
115         for (i = 0; i < slots; i++) {
116                 if (!rx->data.raw_addressing) {
117                         struct page *page = rx->data.qpl->pages[i];
118                         dma_addr_t addr = i * PAGE_SIZE;
119
120                         gve_setup_rx_buffer(&rx->data.page_info[i], addr, page,
121                                             &rx->data.data_ring[i].qpl_offset);
122                         continue;
123                 }
124                 err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i],
125                                           &rx->data.data_ring[i]);
126                 if (err)
127                         goto alloc_err;
128         }
129
130         return slots;
131 alloc_err:
132         while (i--)
133                 gve_rx_free_buffer(&priv->pdev->dev,
134                                    &rx->data.page_info[i],
135                                    &rx->data.data_ring[i]);
136         return err;
137 }
138
139 static int gve_rx_alloc_ring(struct gve_priv *priv, int idx)
140 {
141         struct gve_rx_ring *rx = &priv->rx[idx];
142         struct device *hdev = &priv->pdev->dev;
143         u32 slots, npages;
144         int filled_pages;
145         size_t bytes;
146         int err;
147
148         netif_dbg(priv, drv, priv->dev, "allocating rx ring\n");
149         /* Make sure everything is zeroed to start with */
150         memset(rx, 0, sizeof(*rx));
151
152         rx->gve = priv;
153         rx->q_num = idx;
154
155         slots = priv->rx_data_slot_cnt;
156         rx->mask = slots - 1;
157         rx->data.raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
158
159         /* alloc rx data ring */
160         bytes = sizeof(*rx->data.data_ring) * slots;
161         rx->data.data_ring = dma_alloc_coherent(hdev, bytes,
162                                                 &rx->data.data_bus,
163                                                 GFP_KERNEL);
164         if (!rx->data.data_ring)
165                 return -ENOMEM;
166         filled_pages = gve_prefill_rx_pages(rx);
167         if (filled_pages < 0) {
168                 err = -ENOMEM;
169                 goto abort_with_slots;
170         }
171         rx->fill_cnt = filled_pages;
172         /* Ensure data ring slots (packet buffers) are visible. */
173         dma_wmb();
174
175         /* Alloc gve_queue_resources */
176         rx->q_resources =
177                 dma_alloc_coherent(hdev,
178                                    sizeof(*rx->q_resources),
179                                    &rx->q_resources_bus,
180                                    GFP_KERNEL);
181         if (!rx->q_resources) {
182                 err = -ENOMEM;
183                 goto abort_filled;
184         }
185         netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx,
186                   (unsigned long)rx->data.data_bus);
187
188         /* alloc rx desc ring */
189         bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt;
190         npages = bytes / PAGE_SIZE;
191         if (npages * PAGE_SIZE != bytes) {
192                 err = -EIO;
193                 goto abort_with_q_resources;
194         }
195
196         rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus,
197                                                 GFP_KERNEL);
198         if (!rx->desc.desc_ring) {
199                 err = -ENOMEM;
200                 goto abort_with_q_resources;
201         }
202         rx->cnt = 0;
203         rx->db_threshold = priv->rx_desc_cnt / 2;
204         rx->desc.seqno = 1;
205         gve_rx_add_to_block(priv, idx);
206
207         return 0;
208
209 abort_with_q_resources:
210         dma_free_coherent(hdev, sizeof(*rx->q_resources),
211                           rx->q_resources, rx->q_resources_bus);
212         rx->q_resources = NULL;
213 abort_filled:
214         gve_rx_unfill_pages(priv, rx);
215 abort_with_slots:
216         bytes = sizeof(*rx->data.data_ring) * slots;
217         dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus);
218         rx->data.data_ring = NULL;
219
220         return err;
221 }
222
223 int gve_rx_alloc_rings(struct gve_priv *priv)
224 {
225         int err = 0;
226         int i;
227
228         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
229                 err = gve_rx_alloc_ring(priv, i);
230                 if (err) {
231                         netif_err(priv, drv, priv->dev,
232                                   "Failed to alloc rx ring=%d: err=%d\n",
233                                   i, err);
234                         break;
235                 }
236         }
237         /* Unallocate if there was an error */
238         if (err) {
239                 int j;
240
241                 for (j = 0; j < i; j++)
242                         gve_rx_free_ring(priv, j);
243         }
244         return err;
245 }
246
247 void gve_rx_free_rings_gqi(struct gve_priv *priv)
248 {
249         int i;
250
251         for (i = 0; i < priv->rx_cfg.num_queues; i++)
252                 gve_rx_free_ring(priv, i);
253 }
254
255 void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx)
256 {
257         u32 db_idx = be32_to_cpu(rx->q_resources->db_index);
258
259         iowrite32be(rx->fill_cnt, &priv->db_bar2[db_idx]);
260 }
261
262 static enum pkt_hash_types gve_rss_type(__be16 pkt_flags)
263 {
264         if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP)))
265                 return PKT_HASH_TYPE_L4;
266         if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6))
267                 return PKT_HASH_TYPE_L3;
268         return PKT_HASH_TYPE_L2;
269 }
270
271 static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi,
272                                         struct gve_rx_slot_page_info *page_info,
273                                         u16 len)
274 {
275         struct sk_buff *skb = napi_get_frags(napi);
276
277         if (unlikely(!skb))
278                 return NULL;
279
280         skb_add_rx_frag(skb, 0, page_info->page,
281                         page_info->page_offset +
282                         GVE_RX_PAD, len, PAGE_SIZE / 2);
283
284         return skb;
285 }
286
287 static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr)
288 {
289         const __be64 offset = cpu_to_be64(PAGE_SIZE / 2);
290
291         /* "flip" to other packet buffer on this page */
292         page_info->page_offset ^= PAGE_SIZE / 2;
293         *(slot_addr) ^= offset;
294 }
295
296 static bool gve_rx_can_flip_buffers(struct net_device *netdev)
297 {
298         return PAGE_SIZE == 4096
299                 ? netdev->mtu + GVE_RX_PAD + ETH_HLEN <= PAGE_SIZE / 2 : false;
300 }
301
302 static int gve_rx_can_recycle_buffer(struct page *page)
303 {
304         int pagecount = page_count(page);
305
306         /* This page is not being used by any SKBs - reuse */
307         if (pagecount == 1)
308                 return 1;
309         /* This page is still being used by an SKB - we can't reuse */
310         else if (pagecount >= 2)
311                 return 0;
312         WARN(pagecount < 1, "Pagecount should never be < 1");
313         return -1;
314 }
315
316 static struct sk_buff *
317 gve_rx_raw_addressing(struct device *dev, struct net_device *netdev,
318                       struct gve_rx_slot_page_info *page_info, u16 len,
319                       struct napi_struct *napi,
320                       union gve_rx_data_slot *data_slot)
321 {
322         struct sk_buff *skb;
323
324         skb = gve_rx_add_frags(napi, page_info, len);
325         if (!skb)
326                 return NULL;
327
328         /* Optimistically stop the kernel from freeing the page by increasing
329          * the page bias. We will check the refcount in refill to determine if
330          * we need to alloc a new page.
331          */
332         get_page(page_info->page);
333
334         return skb;
335 }
336
337 static struct sk_buff *
338 gve_rx_qpl(struct device *dev, struct net_device *netdev,
339            struct gve_rx_ring *rx, struct gve_rx_slot_page_info *page_info,
340            u16 len, struct napi_struct *napi,
341            union gve_rx_data_slot *data_slot)
342 {
343         struct sk_buff *skb;
344
345         /* if raw_addressing mode is not enabled gvnic can only receive into
346          * registered segments. If the buffer can't be recycled, our only
347          * choice is to copy the data out of it so that we can return it to the
348          * device.
349          */
350         if (page_info->can_flip) {
351                 skb = gve_rx_add_frags(napi, page_info, len);
352                 /* No point in recycling if we didn't get the skb */
353                 if (skb) {
354                         /* Make sure that the page isn't freed. */
355                         get_page(page_info->page);
356                         gve_rx_flip_buff(page_info, &data_slot->qpl_offset);
357                 }
358         } else {
359                 skb = gve_rx_copy(netdev, napi, page_info, len, GVE_RX_PAD);
360                 if (skb) {
361                         u64_stats_update_begin(&rx->statss);
362                         rx->rx_copied_pkt++;
363                         u64_stats_update_end(&rx->statss);
364                 }
365         }
366         return skb;
367 }
368
369 static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
370                    netdev_features_t feat, u32 idx)
371 {
372         struct gve_rx_slot_page_info *page_info;
373         struct gve_priv *priv = rx->gve;
374         struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
375         struct net_device *dev = priv->dev;
376         union gve_rx_data_slot *data_slot;
377         struct sk_buff *skb = NULL;
378         dma_addr_t page_bus;
379         u16 len;
380
381         /* drop this packet */
382         if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) {
383                 u64_stats_update_begin(&rx->statss);
384                 rx->rx_desc_err_dropped_pkt++;
385                 u64_stats_update_end(&rx->statss);
386                 return false;
387         }
388
389         len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
390         page_info = &rx->data.page_info[idx];
391
392         data_slot = &rx->data.data_ring[idx];
393         page_bus = (rx->data.raw_addressing) ?
394                         be64_to_cpu(data_slot->addr) & GVE_DATA_SLOT_ADDR_PAGE_MASK :
395                         rx->data.qpl->page_buses[idx];
396         dma_sync_single_for_cpu(&priv->pdev->dev, page_bus,
397                                 PAGE_SIZE, DMA_FROM_DEVICE);
398
399         if (len <= priv->rx_copybreak) {
400                 /* Just copy small packets */
401                 skb = gve_rx_copy(dev, napi, page_info, len, GVE_RX_PAD);
402                 u64_stats_update_begin(&rx->statss);
403                 rx->rx_copied_pkt++;
404                 rx->rx_copybreak_pkt++;
405                 u64_stats_update_end(&rx->statss);
406         } else {
407                 u8 can_flip = gve_rx_can_flip_buffers(dev);
408                 int recycle = 0;
409
410                 if (can_flip) {
411                         recycle = gve_rx_can_recycle_buffer(page_info->page);
412                         if (recycle < 0) {
413                                 if (!rx->data.raw_addressing)
414                                         gve_schedule_reset(priv);
415                                 return false;
416                         }
417                 }
418
419                 page_info->can_flip = can_flip && recycle;
420                 if (rx->data.raw_addressing) {
421                         skb = gve_rx_raw_addressing(&priv->pdev->dev, dev,
422                                                     page_info, len, napi,
423                                                     data_slot);
424                 } else {
425                         skb = gve_rx_qpl(&priv->pdev->dev, dev, rx,
426                                          page_info, len, napi, data_slot);
427                 }
428         }
429
430         if (!skb) {
431                 u64_stats_update_begin(&rx->statss);
432                 rx->rx_skb_alloc_fail++;
433                 u64_stats_update_end(&rx->statss);
434                 return false;
435         }
436
437         if (likely(feat & NETIF_F_RXCSUM)) {
438                 /* NIC passes up the partial sum */
439                 if (rx_desc->csum)
440                         skb->ip_summed = CHECKSUM_COMPLETE;
441                 else
442                         skb->ip_summed = CHECKSUM_NONE;
443                 skb->csum = csum_unfold(rx_desc->csum);
444         }
445
446         /* parse flags & pass relevant info up */
447         if (likely(feat & NETIF_F_RXHASH) &&
448             gve_needs_rss(rx_desc->flags_seq))
449                 skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash),
450                              gve_rss_type(rx_desc->flags_seq));
451
452         if (skb_is_nonlinear(skb))
453                 napi_gro_frags(napi);
454         else
455                 napi_gro_receive(napi, skb);
456         return true;
457 }
458
459 static bool gve_rx_work_pending(struct gve_rx_ring *rx)
460 {
461         struct gve_rx_desc *desc;
462         __be16 flags_seq;
463         u32 next_idx;
464
465         next_idx = rx->cnt & rx->mask;
466         desc = rx->desc.desc_ring + next_idx;
467
468         flags_seq = desc->flags_seq;
469         /* Make sure we have synchronized the seq no with the device */
470         smp_rmb();
471
472         return (GVE_SEQNO(flags_seq) == rx->desc.seqno);
473 }
474
475 static bool gve_rx_refill_buffers(struct gve_priv *priv, struct gve_rx_ring *rx)
476 {
477         int refill_target = rx->mask + 1;
478         u32 fill_cnt = rx->fill_cnt;
479
480         while (fill_cnt - rx->cnt < refill_target) {
481                 struct gve_rx_slot_page_info *page_info;
482                 u32 idx = fill_cnt & rx->mask;
483
484                 page_info = &rx->data.page_info[idx];
485                 if (page_info->can_flip) {
486                         /* The other half of the page is free because it was
487                          * free when we processed the descriptor. Flip to it.
488                          */
489                         union gve_rx_data_slot *data_slot =
490                                                 &rx->data.data_ring[idx];
491
492                         gve_rx_flip_buff(page_info, &data_slot->addr);
493                         page_info->can_flip = 0;
494                 } else {
495                         /* It is possible that the networking stack has already
496                          * finished processing all outstanding packets in the buffer
497                          * and it can be reused.
498                          * Flipping is unnecessary here - if the networking stack still
499                          * owns half the page it is impossible to tell which half. Either
500                          * the whole page is free or it needs to be replaced.
501                          */
502                         int recycle = gve_rx_can_recycle_buffer(page_info->page);
503
504                         if (recycle < 0) {
505                                 if (!rx->data.raw_addressing)
506                                         gve_schedule_reset(priv);
507                                 return false;
508                         }
509                         if (!recycle) {
510                                 /* We can't reuse the buffer - alloc a new one*/
511                                 union gve_rx_data_slot *data_slot =
512                                                 &rx->data.data_ring[idx];
513                                 struct device *dev = &priv->pdev->dev;
514
515                                 gve_rx_free_buffer(dev, page_info, data_slot);
516                                 page_info->page = NULL;
517                                 if (gve_rx_alloc_buffer(priv, dev, page_info, data_slot))
518                                         break;
519                         }
520                 }
521                 fill_cnt++;
522         }
523         rx->fill_cnt = fill_cnt;
524         return true;
525 }
526
527 bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
528                        netdev_features_t feat)
529 {
530         struct gve_priv *priv = rx->gve;
531         u32 work_done = 0, packets = 0;
532         struct gve_rx_desc *desc;
533         u32 cnt = rx->cnt;
534         u32 idx = cnt & rx->mask;
535         u64 bytes = 0;
536
537         desc = rx->desc.desc_ring + idx;
538         while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) &&
539                work_done < budget) {
540                 bool dropped;
541
542                 netif_info(priv, rx_status, priv->dev,
543                            "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n",
544                            rx->q_num, idx, desc, desc->flags_seq);
545                 netif_info(priv, rx_status, priv->dev,
546                            "[%d] seqno=%d rx->desc.seqno=%d\n",
547                            rx->q_num, GVE_SEQNO(desc->flags_seq),
548                            rx->desc.seqno);
549                 dropped = !gve_rx(rx, desc, feat, idx);
550                 if (!dropped) {
551                         bytes += be16_to_cpu(desc->len) - GVE_RX_PAD;
552                         packets++;
553                 }
554                 cnt++;
555                 idx = cnt & rx->mask;
556                 desc = rx->desc.desc_ring + idx;
557                 rx->desc.seqno = gve_next_seqno(rx->desc.seqno);
558                 work_done++;
559         }
560
561         if (!work_done && rx->fill_cnt - cnt > rx->db_threshold)
562                 return false;
563
564         u64_stats_update_begin(&rx->statss);
565         rx->rpackets += packets;
566         rx->rbytes += bytes;
567         u64_stats_update_end(&rx->statss);
568         rx->cnt = cnt;
569
570         /* restock ring slots */
571         if (!rx->data.raw_addressing) {
572                 /* In QPL mode buffs are refilled as the desc are processed */
573                 rx->fill_cnt += work_done;
574         } else if (rx->fill_cnt - cnt <= rx->db_threshold) {
575                 /* In raw addressing mode buffs are only refilled if the avail
576                  * falls below a threshold.
577                  */
578                 if (!gve_rx_refill_buffers(priv, rx))
579                         return false;
580
581                 /* If we were not able to completely refill buffers, we'll want
582                  * to schedule this queue for work again to refill buffers.
583                  */
584                 if (rx->fill_cnt - cnt <= rx->db_threshold) {
585                         gve_rx_write_doorbell(priv, rx);
586                         return true;
587                 }
588         }
589
590         gve_rx_write_doorbell(priv, rx);
591         return gve_rx_work_pending(rx);
592 }
593
594 bool gve_rx_poll(struct gve_notify_block *block, int budget)
595 {
596         struct gve_rx_ring *rx = block->rx;
597         netdev_features_t feat;
598         bool repoll = false;
599
600         feat = block->napi.dev->features;
601
602         /* If budget is 0, do all the work */
603         if (budget == 0)
604                 budget = INT_MAX;
605
606         if (budget > 0)
607                 repoll |= gve_clean_rx_done(rx, budget, feat);
608         else
609                 repoll |= gve_rx_work_pending(rx);
610         return repoll;
611 }