gve: Use size_add() in call to struct_size()
[platform/kernel/linux-starfive.git] / drivers / net / ethernet / google / gve / gve_main.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/workqueue.h>
17 #include <linux/utsname.h>
18 #include <linux/version.h>
19 #include <net/sch_generic.h>
20 #include <net/xdp_sock_drv.h>
21 #include "gve.h"
22 #include "gve_dqo.h"
23 #include "gve_adminq.h"
24 #include "gve_register.h"
25
26 #define GVE_DEFAULT_RX_COPYBREAK        (256)
27
28 #define DEFAULT_MSG_LEVEL       (NETIF_MSG_DRV | NETIF_MSG_LINK)
29 #define GVE_VERSION             "1.0.0"
30 #define GVE_VERSION_PREFIX      "GVE-"
31
32 // Minimum amount of time between queue kicks in msec (10 seconds)
33 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
34
35 char gve_driver_name[] = "gve";
36 const char gve_version_str[] = GVE_VERSION;
37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38
39 static int gve_verify_driver_compatibility(struct gve_priv *priv)
40 {
41         int err;
42         struct gve_driver_info *driver_info;
43         dma_addr_t driver_info_bus;
44
45         driver_info = dma_alloc_coherent(&priv->pdev->dev,
46                                          sizeof(struct gve_driver_info),
47                                          &driver_info_bus, GFP_KERNEL);
48         if (!driver_info)
49                 return -ENOMEM;
50
51         *driver_info = (struct gve_driver_info) {
52                 .os_type = 1, /* Linux */
53                 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54                 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55                 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56                 .driver_capability_flags = {
57                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61                 },
62         };
63         strscpy(driver_info->os_version_str1, utsname()->release,
64                 sizeof(driver_info->os_version_str1));
65         strscpy(driver_info->os_version_str2, utsname()->version,
66                 sizeof(driver_info->os_version_str2));
67
68         err = gve_adminq_verify_driver_compatibility(priv,
69                                                      sizeof(struct gve_driver_info),
70                                                      driver_info_bus);
71
72         /* It's ok if the device doesn't support this */
73         if (err == -EOPNOTSUPP)
74                 err = 0;
75
76         dma_free_coherent(&priv->pdev->dev,
77                           sizeof(struct gve_driver_info),
78                           driver_info, driver_info_bus);
79         return err;
80 }
81
82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
83 {
84         struct gve_priv *priv = netdev_priv(dev);
85
86         if (gve_is_gqi(priv))
87                 return gve_tx(skb, dev);
88         else
89                 return gve_tx_dqo(skb, dev);
90 }
91
92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
93 {
94         struct gve_priv *priv = netdev_priv(dev);
95         unsigned int start;
96         u64 packets, bytes;
97         int num_tx_queues;
98         int ring;
99
100         num_tx_queues = gve_num_tx_queues(priv);
101         if (priv->rx) {
102                 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
103                         do {
104                                 start =
105                                   u64_stats_fetch_begin(&priv->rx[ring].statss);
106                                 packets = priv->rx[ring].rpackets;
107                                 bytes = priv->rx[ring].rbytes;
108                         } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
109                                                        start));
110                         s->rx_packets += packets;
111                         s->rx_bytes += bytes;
112                 }
113         }
114         if (priv->tx) {
115                 for (ring = 0; ring < num_tx_queues; ring++) {
116                         do {
117                                 start =
118                                   u64_stats_fetch_begin(&priv->tx[ring].statss);
119                                 packets = priv->tx[ring].pkt_done;
120                                 bytes = priv->tx[ring].bytes_done;
121                         } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
122                                                        start));
123                         s->tx_packets += packets;
124                         s->tx_bytes += bytes;
125                 }
126         }
127 }
128
129 static int gve_alloc_counter_array(struct gve_priv *priv)
130 {
131         priv->counter_array =
132                 dma_alloc_coherent(&priv->pdev->dev,
133                                    priv->num_event_counters *
134                                    sizeof(*priv->counter_array),
135                                    &priv->counter_array_bus, GFP_KERNEL);
136         if (!priv->counter_array)
137                 return -ENOMEM;
138
139         return 0;
140 }
141
142 static void gve_free_counter_array(struct gve_priv *priv)
143 {
144         if (!priv->counter_array)
145                 return;
146
147         dma_free_coherent(&priv->pdev->dev,
148                           priv->num_event_counters *
149                           sizeof(*priv->counter_array),
150                           priv->counter_array, priv->counter_array_bus);
151         priv->counter_array = NULL;
152 }
153
154 /* NIC requests to report stats */
155 static void gve_stats_report_task(struct work_struct *work)
156 {
157         struct gve_priv *priv = container_of(work, struct gve_priv,
158                                              stats_report_task);
159         if (gve_get_do_report_stats(priv)) {
160                 gve_handle_report_stats(priv);
161                 gve_clear_do_report_stats(priv);
162         }
163 }
164
165 static void gve_stats_report_schedule(struct gve_priv *priv)
166 {
167         if (!gve_get_probe_in_progress(priv) &&
168             !gve_get_reset_in_progress(priv)) {
169                 gve_set_do_report_stats(priv);
170                 queue_work(priv->gve_wq, &priv->stats_report_task);
171         }
172 }
173
174 static void gve_stats_report_timer(struct timer_list *t)
175 {
176         struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
177
178         mod_timer(&priv->stats_report_timer,
179                   round_jiffies(jiffies +
180                   msecs_to_jiffies(priv->stats_report_timer_period)));
181         gve_stats_report_schedule(priv);
182 }
183
184 static int gve_alloc_stats_report(struct gve_priv *priv)
185 {
186         int tx_stats_num, rx_stats_num;
187
188         tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
189                        gve_num_tx_queues(priv);
190         rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
191                        priv->rx_cfg.num_queues;
192         priv->stats_report_len = struct_size(priv->stats_report, stats,
193                                              size_add(tx_stats_num, rx_stats_num));
194         priv->stats_report =
195                 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
196                                    &priv->stats_report_bus, GFP_KERNEL);
197         if (!priv->stats_report)
198                 return -ENOMEM;
199         /* Set up timer for the report-stats task */
200         timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
201         priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
202         return 0;
203 }
204
205 static void gve_free_stats_report(struct gve_priv *priv)
206 {
207         if (!priv->stats_report)
208                 return;
209
210         del_timer_sync(&priv->stats_report_timer);
211         dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
212                           priv->stats_report, priv->stats_report_bus);
213         priv->stats_report = NULL;
214 }
215
216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
217 {
218         struct gve_priv *priv = arg;
219
220         queue_work(priv->gve_wq, &priv->service_task);
221         return IRQ_HANDLED;
222 }
223
224 static irqreturn_t gve_intr(int irq, void *arg)
225 {
226         struct gve_notify_block *block = arg;
227         struct gve_priv *priv = block->priv;
228
229         iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
230         napi_schedule_irqoff(&block->napi);
231         return IRQ_HANDLED;
232 }
233
234 static irqreturn_t gve_intr_dqo(int irq, void *arg)
235 {
236         struct gve_notify_block *block = arg;
237
238         /* Interrupts are automatically masked */
239         napi_schedule_irqoff(&block->napi);
240         return IRQ_HANDLED;
241 }
242
243 static int gve_napi_poll(struct napi_struct *napi, int budget)
244 {
245         struct gve_notify_block *block;
246         __be32 __iomem *irq_doorbell;
247         bool reschedule = false;
248         struct gve_priv *priv;
249         int work_done = 0;
250
251         block = container_of(napi, struct gve_notify_block, napi);
252         priv = block->priv;
253
254         if (block->tx) {
255                 if (block->tx->q_num < priv->tx_cfg.num_queues)
256                         reschedule |= gve_tx_poll(block, budget);
257                 else
258                         reschedule |= gve_xdp_poll(block, budget);
259         }
260
261         if (block->rx) {
262                 work_done = gve_rx_poll(block, budget);
263                 reschedule |= work_done == budget;
264         }
265
266         if (reschedule)
267                 return budget;
268
269        /* Complete processing - don't unmask irq if busy polling is enabled */
270         if (likely(napi_complete_done(napi, work_done))) {
271                 irq_doorbell = gve_irq_doorbell(priv, block);
272                 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
273
274                 /* Ensure IRQ ACK is visible before we check pending work.
275                  * If queue had issued updates, it would be truly visible.
276                  */
277                 mb();
278
279                 if (block->tx)
280                         reschedule |= gve_tx_clean_pending(priv, block->tx);
281                 if (block->rx)
282                         reschedule |= gve_rx_work_pending(block->rx);
283
284                 if (reschedule && napi_reschedule(napi))
285                         iowrite32be(GVE_IRQ_MASK, irq_doorbell);
286         }
287         return work_done;
288 }
289
290 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
291 {
292         struct gve_notify_block *block =
293                 container_of(napi, struct gve_notify_block, napi);
294         struct gve_priv *priv = block->priv;
295         bool reschedule = false;
296         int work_done = 0;
297
298         if (block->tx)
299                 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
300
301         if (block->rx) {
302                 work_done = gve_rx_poll_dqo(block, budget);
303                 reschedule |= work_done == budget;
304         }
305
306         if (reschedule)
307                 return budget;
308
309         if (likely(napi_complete_done(napi, work_done))) {
310                 /* Enable interrupts again.
311                  *
312                  * We don't need to repoll afterwards because HW supports the
313                  * PCI MSI-X PBA feature.
314                  *
315                  * Another interrupt would be triggered if a new event came in
316                  * since the last one.
317                  */
318                 gve_write_irq_doorbell_dqo(priv, block,
319                                            GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
320         }
321
322         return work_done;
323 }
324
325 static int gve_alloc_notify_blocks(struct gve_priv *priv)
326 {
327         int num_vecs_requested = priv->num_ntfy_blks + 1;
328         unsigned int active_cpus;
329         int vecs_enabled;
330         int i, j;
331         int err;
332
333         priv->msix_vectors = kvcalloc(num_vecs_requested,
334                                       sizeof(*priv->msix_vectors), GFP_KERNEL);
335         if (!priv->msix_vectors)
336                 return -ENOMEM;
337         for (i = 0; i < num_vecs_requested; i++)
338                 priv->msix_vectors[i].entry = i;
339         vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
340                                              GVE_MIN_MSIX, num_vecs_requested);
341         if (vecs_enabled < 0) {
342                 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
343                         GVE_MIN_MSIX, vecs_enabled);
344                 err = vecs_enabled;
345                 goto abort_with_msix_vectors;
346         }
347         if (vecs_enabled != num_vecs_requested) {
348                 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
349                 int vecs_per_type = new_num_ntfy_blks / 2;
350                 int vecs_left = new_num_ntfy_blks % 2;
351
352                 priv->num_ntfy_blks = new_num_ntfy_blks;
353                 priv->mgmt_msix_idx = priv->num_ntfy_blks;
354                 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
355                                                 vecs_per_type);
356                 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
357                                                 vecs_per_type + vecs_left);
358                 dev_err(&priv->pdev->dev,
359                         "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
360                         vecs_enabled, priv->tx_cfg.max_queues,
361                         priv->rx_cfg.max_queues);
362                 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
363                         priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
364                 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
365                         priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
366         }
367         /* Half the notification blocks go to TX and half to RX */
368         active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
369
370         /* Setup Management Vector  - the last vector */
371         snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
372                  pci_name(priv->pdev));
373         err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
374                           gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
375         if (err) {
376                 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
377                 goto abort_with_msix_enabled;
378         }
379         priv->irq_db_indices =
380                 dma_alloc_coherent(&priv->pdev->dev,
381                                    priv->num_ntfy_blks *
382                                    sizeof(*priv->irq_db_indices),
383                                    &priv->irq_db_indices_bus, GFP_KERNEL);
384         if (!priv->irq_db_indices) {
385                 err = -ENOMEM;
386                 goto abort_with_mgmt_vector;
387         }
388
389         priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
390                                      sizeof(*priv->ntfy_blocks), GFP_KERNEL);
391         if (!priv->ntfy_blocks) {
392                 err = -ENOMEM;
393                 goto abort_with_irq_db_indices;
394         }
395
396         /* Setup the other blocks - the first n-1 vectors */
397         for (i = 0; i < priv->num_ntfy_blks; i++) {
398                 struct gve_notify_block *block = &priv->ntfy_blocks[i];
399                 int msix_idx = i;
400
401                 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
402                          i, pci_name(priv->pdev));
403                 block->priv = priv;
404                 err = request_irq(priv->msix_vectors[msix_idx].vector,
405                                   gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
406                                   0, block->name, block);
407                 if (err) {
408                         dev_err(&priv->pdev->dev,
409                                 "Failed to receive msix vector %d\n", i);
410                         goto abort_with_some_ntfy_blocks;
411                 }
412                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
413                                       get_cpu_mask(i % active_cpus));
414                 block->irq_db_index = &priv->irq_db_indices[i].index;
415         }
416         return 0;
417 abort_with_some_ntfy_blocks:
418         for (j = 0; j < i; j++) {
419                 struct gve_notify_block *block = &priv->ntfy_blocks[j];
420                 int msix_idx = j;
421
422                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
423                                       NULL);
424                 free_irq(priv->msix_vectors[msix_idx].vector, block);
425         }
426         kvfree(priv->ntfy_blocks);
427         priv->ntfy_blocks = NULL;
428 abort_with_irq_db_indices:
429         dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
430                           sizeof(*priv->irq_db_indices),
431                           priv->irq_db_indices, priv->irq_db_indices_bus);
432         priv->irq_db_indices = NULL;
433 abort_with_mgmt_vector:
434         free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
435 abort_with_msix_enabled:
436         pci_disable_msix(priv->pdev);
437 abort_with_msix_vectors:
438         kvfree(priv->msix_vectors);
439         priv->msix_vectors = NULL;
440         return err;
441 }
442
443 static void gve_free_notify_blocks(struct gve_priv *priv)
444 {
445         int i;
446
447         if (!priv->msix_vectors)
448                 return;
449
450         /* Free the irqs */
451         for (i = 0; i < priv->num_ntfy_blks; i++) {
452                 struct gve_notify_block *block = &priv->ntfy_blocks[i];
453                 int msix_idx = i;
454
455                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
456                                       NULL);
457                 free_irq(priv->msix_vectors[msix_idx].vector, block);
458         }
459         free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
460         kvfree(priv->ntfy_blocks);
461         priv->ntfy_blocks = NULL;
462         dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
463                           sizeof(*priv->irq_db_indices),
464                           priv->irq_db_indices, priv->irq_db_indices_bus);
465         priv->irq_db_indices = NULL;
466         pci_disable_msix(priv->pdev);
467         kvfree(priv->msix_vectors);
468         priv->msix_vectors = NULL;
469 }
470
471 static int gve_setup_device_resources(struct gve_priv *priv)
472 {
473         int err;
474
475         err = gve_alloc_counter_array(priv);
476         if (err)
477                 return err;
478         err = gve_alloc_notify_blocks(priv);
479         if (err)
480                 goto abort_with_counter;
481         err = gve_alloc_stats_report(priv);
482         if (err)
483                 goto abort_with_ntfy_blocks;
484         err = gve_adminq_configure_device_resources(priv,
485                                                     priv->counter_array_bus,
486                                                     priv->num_event_counters,
487                                                     priv->irq_db_indices_bus,
488                                                     priv->num_ntfy_blks);
489         if (unlikely(err)) {
490                 dev_err(&priv->pdev->dev,
491                         "could not setup device_resources: err=%d\n", err);
492                 err = -ENXIO;
493                 goto abort_with_stats_report;
494         }
495
496         if (!gve_is_gqi(priv)) {
497                 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
498                                                GFP_KERNEL);
499                 if (!priv->ptype_lut_dqo) {
500                         err = -ENOMEM;
501                         goto abort_with_stats_report;
502                 }
503                 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
504                 if (err) {
505                         dev_err(&priv->pdev->dev,
506                                 "Failed to get ptype map: err=%d\n", err);
507                         goto abort_with_ptype_lut;
508                 }
509         }
510
511         err = gve_adminq_report_stats(priv, priv->stats_report_len,
512                                       priv->stats_report_bus,
513                                       GVE_STATS_REPORT_TIMER_PERIOD);
514         if (err)
515                 dev_err(&priv->pdev->dev,
516                         "Failed to report stats: err=%d\n", err);
517         gve_set_device_resources_ok(priv);
518         return 0;
519
520 abort_with_ptype_lut:
521         kvfree(priv->ptype_lut_dqo);
522         priv->ptype_lut_dqo = NULL;
523 abort_with_stats_report:
524         gve_free_stats_report(priv);
525 abort_with_ntfy_blocks:
526         gve_free_notify_blocks(priv);
527 abort_with_counter:
528         gve_free_counter_array(priv);
529
530         return err;
531 }
532
533 static void gve_trigger_reset(struct gve_priv *priv);
534
535 static void gve_teardown_device_resources(struct gve_priv *priv)
536 {
537         int err;
538
539         /* Tell device its resources are being freed */
540         if (gve_get_device_resources_ok(priv)) {
541                 /* detach the stats report */
542                 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
543                 if (err) {
544                         dev_err(&priv->pdev->dev,
545                                 "Failed to detach stats report: err=%d\n", err);
546                         gve_trigger_reset(priv);
547                 }
548                 err = gve_adminq_deconfigure_device_resources(priv);
549                 if (err) {
550                         dev_err(&priv->pdev->dev,
551                                 "Could not deconfigure device resources: err=%d\n",
552                                 err);
553                         gve_trigger_reset(priv);
554                 }
555         }
556
557         kvfree(priv->ptype_lut_dqo);
558         priv->ptype_lut_dqo = NULL;
559
560         gve_free_counter_array(priv);
561         gve_free_notify_blocks(priv);
562         gve_free_stats_report(priv);
563         gve_clear_device_resources_ok(priv);
564 }
565
566 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
567                          int (*gve_poll)(struct napi_struct *, int))
568 {
569         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
570
571         netif_napi_add(priv->dev, &block->napi, gve_poll);
572 }
573
574 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
575 {
576         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
577
578         netif_napi_del(&block->napi);
579 }
580
581 static int gve_register_xdp_qpls(struct gve_priv *priv)
582 {
583         int start_id;
584         int err;
585         int i;
586
587         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
588         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
589                 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
590                 if (err) {
591                         netif_err(priv, drv, priv->dev,
592                                   "failed to register queue page list %d\n",
593                                   priv->qpls[i].id);
594                         /* This failure will trigger a reset - no need to clean
595                          * up
596                          */
597                         return err;
598                 }
599         }
600         return 0;
601 }
602
603 static int gve_register_qpls(struct gve_priv *priv)
604 {
605         int start_id;
606         int err;
607         int i;
608
609         start_id = gve_tx_start_qpl_id(priv);
610         for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
611                 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
612                 if (err) {
613                         netif_err(priv, drv, priv->dev,
614                                   "failed to register queue page list %d\n",
615                                   priv->qpls[i].id);
616                         /* This failure will trigger a reset - no need to clean
617                          * up
618                          */
619                         return err;
620                 }
621         }
622
623         start_id = gve_rx_start_qpl_id(priv);
624         for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
625                 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
626                 if (err) {
627                         netif_err(priv, drv, priv->dev,
628                                   "failed to register queue page list %d\n",
629                                   priv->qpls[i].id);
630                         /* This failure will trigger a reset - no need to clean
631                          * up
632                          */
633                         return err;
634                 }
635         }
636         return 0;
637 }
638
639 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
640 {
641         int start_id;
642         int err;
643         int i;
644
645         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
646         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
647                 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
648                 /* This failure will trigger a reset - no need to clean up */
649                 if (err) {
650                         netif_err(priv, drv, priv->dev,
651                                   "Failed to unregister queue page list %d\n",
652                                   priv->qpls[i].id);
653                         return err;
654                 }
655         }
656         return 0;
657 }
658
659 static int gve_unregister_qpls(struct gve_priv *priv)
660 {
661         int start_id;
662         int err;
663         int i;
664
665         start_id = gve_tx_start_qpl_id(priv);
666         for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
667                 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
668                 /* This failure will trigger a reset - no need to clean up */
669                 if (err) {
670                         netif_err(priv, drv, priv->dev,
671                                   "Failed to unregister queue page list %d\n",
672                                   priv->qpls[i].id);
673                         return err;
674                 }
675         }
676
677         start_id = gve_rx_start_qpl_id(priv);
678         for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
679                 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
680                 /* This failure will trigger a reset - no need to clean up */
681                 if (err) {
682                         netif_err(priv, drv, priv->dev,
683                                   "Failed to unregister queue page list %d\n",
684                                   priv->qpls[i].id);
685                         return err;
686                 }
687         }
688         return 0;
689 }
690
691 static int gve_create_xdp_rings(struct gve_priv *priv)
692 {
693         int err;
694
695         err = gve_adminq_create_tx_queues(priv,
696                                           gve_xdp_tx_start_queue_id(priv),
697                                           priv->num_xdp_queues);
698         if (err) {
699                 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
700                           priv->num_xdp_queues);
701                 /* This failure will trigger a reset - no need to clean
702                  * up
703                  */
704                 return err;
705         }
706         netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
707                   priv->num_xdp_queues);
708
709         return 0;
710 }
711
712 static int gve_create_rings(struct gve_priv *priv)
713 {
714         int num_tx_queues = gve_num_tx_queues(priv);
715         int err;
716         int i;
717
718         err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
719         if (err) {
720                 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
721                           num_tx_queues);
722                 /* This failure will trigger a reset - no need to clean
723                  * up
724                  */
725                 return err;
726         }
727         netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
728                   num_tx_queues);
729
730         err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
731         if (err) {
732                 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
733                           priv->rx_cfg.num_queues);
734                 /* This failure will trigger a reset - no need to clean
735                  * up
736                  */
737                 return err;
738         }
739         netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
740                   priv->rx_cfg.num_queues);
741
742         if (gve_is_gqi(priv)) {
743                 /* Rx data ring has been prefilled with packet buffers at queue
744                  * allocation time.
745                  *
746                  * Write the doorbell to provide descriptor slots and packet
747                  * buffers to the NIC.
748                  */
749                 for (i = 0; i < priv->rx_cfg.num_queues; i++)
750                         gve_rx_write_doorbell(priv, &priv->rx[i]);
751         } else {
752                 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
753                         /* Post buffers and ring doorbell. */
754                         gve_rx_post_buffers_dqo(&priv->rx[i]);
755                 }
756         }
757
758         return 0;
759 }
760
761 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
762                                          int (*napi_poll)(struct napi_struct *napi,
763                                                           int budget))
764 {
765         int start_id = gve_xdp_tx_start_queue_id(priv);
766         int i;
767
768         /* Add xdp tx napi & init sync stats*/
769         for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
770                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
771
772                 u64_stats_init(&priv->tx[i].statss);
773                 priv->tx[i].ntfy_id = ntfy_idx;
774                 gve_add_napi(priv, ntfy_idx, napi_poll);
775         }
776 }
777
778 static void add_napi_init_sync_stats(struct gve_priv *priv,
779                                      int (*napi_poll)(struct napi_struct *napi,
780                                                       int budget))
781 {
782         int i;
783
784         /* Add tx napi & init sync stats*/
785         for (i = 0; i < gve_num_tx_queues(priv); i++) {
786                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
787
788                 u64_stats_init(&priv->tx[i].statss);
789                 priv->tx[i].ntfy_id = ntfy_idx;
790                 gve_add_napi(priv, ntfy_idx, napi_poll);
791         }
792         /* Add rx napi  & init sync stats*/
793         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
794                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
795
796                 u64_stats_init(&priv->rx[i].statss);
797                 priv->rx[i].ntfy_id = ntfy_idx;
798                 gve_add_napi(priv, ntfy_idx, napi_poll);
799         }
800 }
801
802 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
803 {
804         if (gve_is_gqi(priv)) {
805                 gve_tx_free_rings_gqi(priv, start_id, num_rings);
806         } else {
807                 gve_tx_free_rings_dqo(priv);
808         }
809 }
810
811 static int gve_alloc_xdp_rings(struct gve_priv *priv)
812 {
813         int start_id;
814         int err = 0;
815
816         if (!priv->num_xdp_queues)
817                 return 0;
818
819         start_id = gve_xdp_tx_start_queue_id(priv);
820         err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
821         if (err)
822                 return err;
823         add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
824
825         return 0;
826 }
827
828 static int gve_alloc_rings(struct gve_priv *priv)
829 {
830         int err;
831
832         /* Setup tx rings */
833         priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
834                             GFP_KERNEL);
835         if (!priv->tx)
836                 return -ENOMEM;
837
838         if (gve_is_gqi(priv))
839                 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
840         else
841                 err = gve_tx_alloc_rings_dqo(priv);
842         if (err)
843                 goto free_tx;
844
845         /* Setup rx rings */
846         priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
847                             GFP_KERNEL);
848         if (!priv->rx) {
849                 err = -ENOMEM;
850                 goto free_tx_queue;
851         }
852
853         if (gve_is_gqi(priv))
854                 err = gve_rx_alloc_rings(priv);
855         else
856                 err = gve_rx_alloc_rings_dqo(priv);
857         if (err)
858                 goto free_rx;
859
860         if (gve_is_gqi(priv))
861                 add_napi_init_sync_stats(priv, gve_napi_poll);
862         else
863                 add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
864
865         return 0;
866
867 free_rx:
868         kvfree(priv->rx);
869         priv->rx = NULL;
870 free_tx_queue:
871         gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
872 free_tx:
873         kvfree(priv->tx);
874         priv->tx = NULL;
875         return err;
876 }
877
878 static int gve_destroy_xdp_rings(struct gve_priv *priv)
879 {
880         int start_id;
881         int err;
882
883         start_id = gve_xdp_tx_start_queue_id(priv);
884         err = gve_adminq_destroy_tx_queues(priv,
885                                            start_id,
886                                            priv->num_xdp_queues);
887         if (err) {
888                 netif_err(priv, drv, priv->dev,
889                           "failed to destroy XDP queues\n");
890                 /* This failure will trigger a reset - no need to clean up */
891                 return err;
892         }
893         netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
894
895         return 0;
896 }
897
898 static int gve_destroy_rings(struct gve_priv *priv)
899 {
900         int num_tx_queues = gve_num_tx_queues(priv);
901         int err;
902
903         err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
904         if (err) {
905                 netif_err(priv, drv, priv->dev,
906                           "failed to destroy tx queues\n");
907                 /* This failure will trigger a reset - no need to clean up */
908                 return err;
909         }
910         netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
911         err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
912         if (err) {
913                 netif_err(priv, drv, priv->dev,
914                           "failed to destroy rx queues\n");
915                 /* This failure will trigger a reset - no need to clean up */
916                 return err;
917         }
918         netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
919         return 0;
920 }
921
922 static void gve_rx_free_rings(struct gve_priv *priv)
923 {
924         if (gve_is_gqi(priv))
925                 gve_rx_free_rings_gqi(priv);
926         else
927                 gve_rx_free_rings_dqo(priv);
928 }
929
930 static void gve_free_xdp_rings(struct gve_priv *priv)
931 {
932         int ntfy_idx, start_id;
933         int i;
934
935         start_id = gve_xdp_tx_start_queue_id(priv);
936         if (priv->tx) {
937                 for (i = start_id; i <  start_id + priv->num_xdp_queues; i++) {
938                         ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
939                         gve_remove_napi(priv, ntfy_idx);
940                 }
941                 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
942         }
943 }
944
945 static void gve_free_rings(struct gve_priv *priv)
946 {
947         int num_tx_queues = gve_num_tx_queues(priv);
948         int ntfy_idx;
949         int i;
950
951         if (priv->tx) {
952                 for (i = 0; i < num_tx_queues; i++) {
953                         ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
954                         gve_remove_napi(priv, ntfy_idx);
955                 }
956                 gve_tx_free_rings(priv, 0, num_tx_queues);
957                 kvfree(priv->tx);
958                 priv->tx = NULL;
959         }
960         if (priv->rx) {
961                 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
962                         ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
963                         gve_remove_napi(priv, ntfy_idx);
964                 }
965                 gve_rx_free_rings(priv);
966                 kvfree(priv->rx);
967                 priv->rx = NULL;
968         }
969 }
970
971 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
972                    struct page **page, dma_addr_t *dma,
973                    enum dma_data_direction dir, gfp_t gfp_flags)
974 {
975         *page = alloc_page(gfp_flags);
976         if (!*page) {
977                 priv->page_alloc_fail++;
978                 return -ENOMEM;
979         }
980         *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
981         if (dma_mapping_error(dev, *dma)) {
982                 priv->dma_mapping_error++;
983                 put_page(*page);
984                 return -ENOMEM;
985         }
986         return 0;
987 }
988
989 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
990                                      int pages)
991 {
992         struct gve_queue_page_list *qpl = &priv->qpls[id];
993         int err;
994         int i;
995
996         if (pages + priv->num_registered_pages > priv->max_registered_pages) {
997                 netif_err(priv, drv, priv->dev,
998                           "Reached max number of registered pages %llu > %llu\n",
999                           pages + priv->num_registered_pages,
1000                           priv->max_registered_pages);
1001                 return -EINVAL;
1002         }
1003
1004         qpl->id = id;
1005         qpl->num_entries = 0;
1006         qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1007         /* caller handles clean up */
1008         if (!qpl->pages)
1009                 return -ENOMEM;
1010         qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1011         /* caller handles clean up */
1012         if (!qpl->page_buses)
1013                 return -ENOMEM;
1014
1015         for (i = 0; i < pages; i++) {
1016                 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1017                                      &qpl->page_buses[i],
1018                                      gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1019                 /* caller handles clean up */
1020                 if (err)
1021                         return -ENOMEM;
1022                 qpl->num_entries++;
1023         }
1024         priv->num_registered_pages += pages;
1025
1026         return 0;
1027 }
1028
1029 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1030                    enum dma_data_direction dir)
1031 {
1032         if (!dma_mapping_error(dev, dma))
1033                 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1034         if (page)
1035                 put_page(page);
1036 }
1037
1038 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1039 {
1040         struct gve_queue_page_list *qpl = &priv->qpls[id];
1041         int i;
1042
1043         if (!qpl->pages)
1044                 return;
1045         if (!qpl->page_buses)
1046                 goto free_pages;
1047
1048         for (i = 0; i < qpl->num_entries; i++)
1049                 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1050                               qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1051
1052         kvfree(qpl->page_buses);
1053         qpl->page_buses = NULL;
1054 free_pages:
1055         kvfree(qpl->pages);
1056         qpl->pages = NULL;
1057         priv->num_registered_pages -= qpl->num_entries;
1058 }
1059
1060 static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1061 {
1062         int start_id;
1063         int i, j;
1064         int err;
1065
1066         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1067         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1068                 err = gve_alloc_queue_page_list(priv, i,
1069                                                 priv->tx_pages_per_qpl);
1070                 if (err)
1071                         goto free_qpls;
1072         }
1073
1074         return 0;
1075
1076 free_qpls:
1077         for (j = start_id; j <= i; j++)
1078                 gve_free_queue_page_list(priv, j);
1079         return err;
1080 }
1081
1082 static int gve_alloc_qpls(struct gve_priv *priv)
1083 {
1084         int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1085         int page_count;
1086         int start_id;
1087         int i, j;
1088         int err;
1089
1090         if (!gve_is_qpl(priv))
1091                 return 0;
1092
1093         priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1094         if (!priv->qpls)
1095                 return -ENOMEM;
1096
1097         start_id = gve_tx_start_qpl_id(priv);
1098         page_count = priv->tx_pages_per_qpl;
1099         for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1100                 err = gve_alloc_queue_page_list(priv, i,
1101                                                 page_count);
1102                 if (err)
1103                         goto free_qpls;
1104         }
1105
1106         start_id = gve_rx_start_qpl_id(priv);
1107
1108         /* For GQI_QPL number of pages allocated have 1:1 relationship with
1109          * number of descriptors. For DQO, number of pages required are
1110          * more than descriptors (because of out of order completions).
1111          */
1112         page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
1113                 priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
1114         for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1115                 err = gve_alloc_queue_page_list(priv, i,
1116                                                 page_count);
1117                 if (err)
1118                         goto free_qpls;
1119         }
1120
1121         priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1122                                      sizeof(unsigned long) * BITS_PER_BYTE;
1123         priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1124                                             sizeof(unsigned long), GFP_KERNEL);
1125         if (!priv->qpl_cfg.qpl_id_map) {
1126                 err = -ENOMEM;
1127                 goto free_qpls;
1128         }
1129
1130         return 0;
1131
1132 free_qpls:
1133         for (j = 0; j <= i; j++)
1134                 gve_free_queue_page_list(priv, j);
1135         kvfree(priv->qpls);
1136         priv->qpls = NULL;
1137         return err;
1138 }
1139
1140 static void gve_free_xdp_qpls(struct gve_priv *priv)
1141 {
1142         int start_id;
1143         int i;
1144
1145         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1146         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1147                 gve_free_queue_page_list(priv, i);
1148 }
1149
1150 static void gve_free_qpls(struct gve_priv *priv)
1151 {
1152         int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1153         int i;
1154
1155         if (!priv->qpls)
1156                 return;
1157
1158         kvfree(priv->qpl_cfg.qpl_id_map);
1159         priv->qpl_cfg.qpl_id_map = NULL;
1160
1161         for (i = 0; i < max_queues; i++)
1162                 gve_free_queue_page_list(priv, i);
1163
1164         kvfree(priv->qpls);
1165         priv->qpls = NULL;
1166 }
1167
1168 /* Use this to schedule a reset when the device is capable of continuing
1169  * to handle other requests in its current state. If it is not, do a reset
1170  * in thread instead.
1171  */
1172 void gve_schedule_reset(struct gve_priv *priv)
1173 {
1174         gve_set_do_reset(priv);
1175         queue_work(priv->gve_wq, &priv->service_task);
1176 }
1177
1178 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1179 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1180 static void gve_turndown(struct gve_priv *priv);
1181 static void gve_turnup(struct gve_priv *priv);
1182
1183 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1184 {
1185         struct napi_struct *napi;
1186         struct gve_rx_ring *rx;
1187         int err = 0;
1188         int i, j;
1189         u32 tx_qid;
1190
1191         if (!priv->num_xdp_queues)
1192                 return 0;
1193
1194         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1195                 rx = &priv->rx[i];
1196                 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1197
1198                 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1199                                        napi->napi_id);
1200                 if (err)
1201                         goto err;
1202                 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1203                                                  MEM_TYPE_PAGE_SHARED, NULL);
1204                 if (err)
1205                         goto err;
1206                 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1207                 if (rx->xsk_pool) {
1208                         err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1209                                                napi->napi_id);
1210                         if (err)
1211                                 goto err;
1212                         err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1213                                                          MEM_TYPE_XSK_BUFF_POOL, NULL);
1214                         if (err)
1215                                 goto err;
1216                         xsk_pool_set_rxq_info(rx->xsk_pool,
1217                                               &rx->xsk_rxq);
1218                 }
1219         }
1220
1221         for (i = 0; i < priv->num_xdp_queues; i++) {
1222                 tx_qid = gve_xdp_tx_queue_id(priv, i);
1223                 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1224         }
1225         return 0;
1226
1227 err:
1228         for (j = i; j >= 0; j--) {
1229                 rx = &priv->rx[j];
1230                 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1231                         xdp_rxq_info_unreg(&rx->xdp_rxq);
1232                 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1233                         xdp_rxq_info_unreg(&rx->xsk_rxq);
1234         }
1235         return err;
1236 }
1237
1238 static void gve_unreg_xdp_info(struct gve_priv *priv)
1239 {
1240         int i, tx_qid;
1241
1242         if (!priv->num_xdp_queues)
1243                 return;
1244
1245         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1246                 struct gve_rx_ring *rx = &priv->rx[i];
1247
1248                 xdp_rxq_info_unreg(&rx->xdp_rxq);
1249                 if (rx->xsk_pool) {
1250                         xdp_rxq_info_unreg(&rx->xsk_rxq);
1251                         rx->xsk_pool = NULL;
1252                 }
1253         }
1254
1255         for (i = 0; i < priv->num_xdp_queues; i++) {
1256                 tx_qid = gve_xdp_tx_queue_id(priv, i);
1257                 priv->tx[tx_qid].xsk_pool = NULL;
1258         }
1259 }
1260
1261 static void gve_drain_page_cache(struct gve_priv *priv)
1262 {
1263         struct page_frag_cache *nc;
1264         int i;
1265
1266         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1267                 nc = &priv->rx[i].page_cache;
1268                 if (nc->va) {
1269                         __page_frag_cache_drain(virt_to_page(nc->va),
1270                                                 nc->pagecnt_bias);
1271                         nc->va = NULL;
1272                 }
1273         }
1274 }
1275
1276 static int gve_open(struct net_device *dev)
1277 {
1278         struct gve_priv *priv = netdev_priv(dev);
1279         int err;
1280
1281         if (priv->xdp_prog)
1282                 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1283         else
1284                 priv->num_xdp_queues = 0;
1285
1286         err = gve_alloc_qpls(priv);
1287         if (err)
1288                 return err;
1289
1290         err = gve_alloc_rings(priv);
1291         if (err)
1292                 goto free_qpls;
1293
1294         err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1295         if (err)
1296                 goto free_rings;
1297         err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1298         if (err)
1299                 goto free_rings;
1300
1301         err = gve_reg_xdp_info(priv, dev);
1302         if (err)
1303                 goto free_rings;
1304
1305         err = gve_register_qpls(priv);
1306         if (err)
1307                 goto reset;
1308
1309         if (!gve_is_gqi(priv)) {
1310                 /* Hard code this for now. This may be tuned in the future for
1311                  * performance.
1312                  */
1313                 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
1314         }
1315         err = gve_create_rings(priv);
1316         if (err)
1317                 goto reset;
1318
1319         gve_set_device_rings_ok(priv);
1320
1321         if (gve_get_report_stats(priv))
1322                 mod_timer(&priv->stats_report_timer,
1323                           round_jiffies(jiffies +
1324                                 msecs_to_jiffies(priv->stats_report_timer_period)));
1325
1326         gve_turnup(priv);
1327         queue_work(priv->gve_wq, &priv->service_task);
1328         priv->interface_up_cnt++;
1329         return 0;
1330
1331 free_rings:
1332         gve_free_rings(priv);
1333 free_qpls:
1334         gve_free_qpls(priv);
1335         return err;
1336
1337 reset:
1338         /* This must have been called from a reset due to the rtnl lock
1339          * so just return at this point.
1340          */
1341         if (gve_get_reset_in_progress(priv))
1342                 return err;
1343         /* Otherwise reset before returning */
1344         gve_reset_and_teardown(priv, true);
1345         /* if this fails there is nothing we can do so just ignore the return */
1346         gve_reset_recovery(priv, false);
1347         /* return the original error */
1348         return err;
1349 }
1350
1351 static int gve_close(struct net_device *dev)
1352 {
1353         struct gve_priv *priv = netdev_priv(dev);
1354         int err;
1355
1356         netif_carrier_off(dev);
1357         if (gve_get_device_rings_ok(priv)) {
1358                 gve_turndown(priv);
1359                 gve_drain_page_cache(priv);
1360                 err = gve_destroy_rings(priv);
1361                 if (err)
1362                         goto err;
1363                 err = gve_unregister_qpls(priv);
1364                 if (err)
1365                         goto err;
1366                 gve_clear_device_rings_ok(priv);
1367         }
1368         del_timer_sync(&priv->stats_report_timer);
1369
1370         gve_unreg_xdp_info(priv);
1371         gve_free_rings(priv);
1372         gve_free_qpls(priv);
1373         priv->interface_down_cnt++;
1374         return 0;
1375
1376 err:
1377         /* This must have been called from a reset due to the rtnl lock
1378          * so just return at this point.
1379          */
1380         if (gve_get_reset_in_progress(priv))
1381                 return err;
1382         /* Otherwise reset before returning */
1383         gve_reset_and_teardown(priv, true);
1384         return gve_reset_recovery(priv, false);
1385 }
1386
1387 static int gve_remove_xdp_queues(struct gve_priv *priv)
1388 {
1389         int err;
1390
1391         err = gve_destroy_xdp_rings(priv);
1392         if (err)
1393                 return err;
1394
1395         err = gve_unregister_xdp_qpls(priv);
1396         if (err)
1397                 return err;
1398
1399         gve_unreg_xdp_info(priv);
1400         gve_free_xdp_rings(priv);
1401         gve_free_xdp_qpls(priv);
1402         priv->num_xdp_queues = 0;
1403         return 0;
1404 }
1405
1406 static int gve_add_xdp_queues(struct gve_priv *priv)
1407 {
1408         int err;
1409
1410         priv->num_xdp_queues = priv->tx_cfg.num_queues;
1411
1412         err = gve_alloc_xdp_qpls(priv);
1413         if (err)
1414                 goto err;
1415
1416         err = gve_alloc_xdp_rings(priv);
1417         if (err)
1418                 goto free_xdp_qpls;
1419
1420         err = gve_reg_xdp_info(priv, priv->dev);
1421         if (err)
1422                 goto free_xdp_rings;
1423
1424         err = gve_register_xdp_qpls(priv);
1425         if (err)
1426                 goto free_xdp_rings;
1427
1428         err = gve_create_xdp_rings(priv);
1429         if (err)
1430                 goto free_xdp_rings;
1431
1432         return 0;
1433
1434 free_xdp_rings:
1435         gve_free_xdp_rings(priv);
1436 free_xdp_qpls:
1437         gve_free_xdp_qpls(priv);
1438 err:
1439         priv->num_xdp_queues = 0;
1440         return err;
1441 }
1442
1443 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1444 {
1445         if (!gve_get_napi_enabled(priv))
1446                 return;
1447
1448         if (link_status == netif_carrier_ok(priv->dev))
1449                 return;
1450
1451         if (link_status) {
1452                 netdev_info(priv->dev, "Device link is up.\n");
1453                 netif_carrier_on(priv->dev);
1454         } else {
1455                 netdev_info(priv->dev, "Device link is down.\n");
1456                 netif_carrier_off(priv->dev);
1457         }
1458 }
1459
1460 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1461                        struct netlink_ext_ack *extack)
1462 {
1463         struct bpf_prog *old_prog;
1464         int err = 0;
1465         u32 status;
1466
1467         old_prog = READ_ONCE(priv->xdp_prog);
1468         if (!netif_carrier_ok(priv->dev)) {
1469                 WRITE_ONCE(priv->xdp_prog, prog);
1470                 if (old_prog)
1471                         bpf_prog_put(old_prog);
1472                 return 0;
1473         }
1474
1475         gve_turndown(priv);
1476         if (!old_prog && prog) {
1477                 // Allocate XDP TX queues if an XDP program is
1478                 // being installed
1479                 err = gve_add_xdp_queues(priv);
1480                 if (err)
1481                         goto out;
1482         } else if (old_prog && !prog) {
1483                 // Remove XDP TX queues if an XDP program is
1484                 // being uninstalled
1485                 err = gve_remove_xdp_queues(priv);
1486                 if (err)
1487                         goto out;
1488         }
1489         WRITE_ONCE(priv->xdp_prog, prog);
1490         if (old_prog)
1491                 bpf_prog_put(old_prog);
1492
1493 out:
1494         gve_turnup(priv);
1495         status = ioread32be(&priv->reg_bar0->device_status);
1496         gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1497         return err;
1498 }
1499
1500 static int gve_xsk_pool_enable(struct net_device *dev,
1501                                struct xsk_buff_pool *pool,
1502                                u16 qid)
1503 {
1504         struct gve_priv *priv = netdev_priv(dev);
1505         struct napi_struct *napi;
1506         struct gve_rx_ring *rx;
1507         int tx_qid;
1508         int err;
1509
1510         if (qid >= priv->rx_cfg.num_queues) {
1511                 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1512                 return -EINVAL;
1513         }
1514         if (xsk_pool_get_rx_frame_size(pool) <
1515              priv->dev->max_mtu + sizeof(struct ethhdr)) {
1516                 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1517                 return -EINVAL;
1518         }
1519
1520         err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1521                                DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1522         if (err)
1523                 return err;
1524
1525         /* If XDP prog is not installed, return */
1526         if (!priv->xdp_prog)
1527                 return 0;
1528
1529         rx = &priv->rx[qid];
1530         napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1531         err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1532         if (err)
1533                 goto err;
1534
1535         err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1536                                          MEM_TYPE_XSK_BUFF_POOL, NULL);
1537         if (err)
1538                 goto err;
1539
1540         xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1541         rx->xsk_pool = pool;
1542
1543         tx_qid = gve_xdp_tx_queue_id(priv, qid);
1544         priv->tx[tx_qid].xsk_pool = pool;
1545
1546         return 0;
1547 err:
1548         if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1549                 xdp_rxq_info_unreg(&rx->xsk_rxq);
1550
1551         xsk_pool_dma_unmap(pool,
1552                            DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1553         return err;
1554 }
1555
1556 static int gve_xsk_pool_disable(struct net_device *dev,
1557                                 u16 qid)
1558 {
1559         struct gve_priv *priv = netdev_priv(dev);
1560         struct napi_struct *napi_rx;
1561         struct napi_struct *napi_tx;
1562         struct xsk_buff_pool *pool;
1563         int tx_qid;
1564
1565         pool = xsk_get_pool_from_qid(dev, qid);
1566         if (!pool)
1567                 return -EINVAL;
1568         if (qid >= priv->rx_cfg.num_queues)
1569                 return -EINVAL;
1570
1571         /* If XDP prog is not installed, unmap DMA and return */
1572         if (!priv->xdp_prog)
1573                 goto done;
1574
1575         tx_qid = gve_xdp_tx_queue_id(priv, qid);
1576         if (!netif_running(dev)) {
1577                 priv->rx[qid].xsk_pool = NULL;
1578                 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1579                 priv->tx[tx_qid].xsk_pool = NULL;
1580                 goto done;
1581         }
1582
1583         napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1584         napi_disable(napi_rx); /* make sure current rx poll is done */
1585
1586         napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1587         napi_disable(napi_tx); /* make sure current tx poll is done */
1588
1589         priv->rx[qid].xsk_pool = NULL;
1590         xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1591         priv->tx[tx_qid].xsk_pool = NULL;
1592         smp_mb(); /* Make sure it is visible to the workers on datapath */
1593
1594         napi_enable(napi_rx);
1595         if (gve_rx_work_pending(&priv->rx[qid]))
1596                 napi_schedule(napi_rx);
1597
1598         napi_enable(napi_tx);
1599         if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1600                 napi_schedule(napi_tx);
1601
1602 done:
1603         xsk_pool_dma_unmap(pool,
1604                            DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1605         return 0;
1606 }
1607
1608 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1609 {
1610         struct gve_priv *priv = netdev_priv(dev);
1611         int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1612
1613         if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1614                 return -EINVAL;
1615
1616         if (flags & XDP_WAKEUP_TX) {
1617                 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1618                 struct napi_struct *napi =
1619                         &priv->ntfy_blocks[tx->ntfy_id].napi;
1620
1621                 if (!napi_if_scheduled_mark_missed(napi)) {
1622                         /* Call local_bh_enable to trigger SoftIRQ processing */
1623                         local_bh_disable();
1624                         napi_schedule(napi);
1625                         local_bh_enable();
1626                 }
1627
1628                 tx->xdp_xsk_wakeup++;
1629         }
1630
1631         return 0;
1632 }
1633
1634 static int verify_xdp_configuration(struct net_device *dev)
1635 {
1636         struct gve_priv *priv = netdev_priv(dev);
1637
1638         if (dev->features & NETIF_F_LRO) {
1639                 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1640                 return -EOPNOTSUPP;
1641         }
1642
1643         if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1644                 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1645                             priv->queue_format);
1646                 return -EOPNOTSUPP;
1647         }
1648
1649         if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
1650                 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1651                             dev->mtu);
1652                 return -EOPNOTSUPP;
1653         }
1654
1655         if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1656             (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1657                 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1658                             priv->rx_cfg.num_queues,
1659                             priv->tx_cfg.num_queues,
1660                             priv->tx_cfg.max_queues);
1661                 return -EINVAL;
1662         }
1663         return 0;
1664 }
1665
1666 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1667 {
1668         struct gve_priv *priv = netdev_priv(dev);
1669         int err;
1670
1671         err = verify_xdp_configuration(dev);
1672         if (err)
1673                 return err;
1674         switch (xdp->command) {
1675         case XDP_SETUP_PROG:
1676                 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1677         case XDP_SETUP_XSK_POOL:
1678                 if (xdp->xsk.pool)
1679                         return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1680                 else
1681                         return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1682         default:
1683                 return -EINVAL;
1684         }
1685 }
1686
1687 int gve_adjust_queues(struct gve_priv *priv,
1688                       struct gve_queue_config new_rx_config,
1689                       struct gve_queue_config new_tx_config)
1690 {
1691         int err;
1692
1693         if (netif_carrier_ok(priv->dev)) {
1694                 /* To make this process as simple as possible we teardown the
1695                  * device, set the new configuration, and then bring the device
1696                  * up again.
1697                  */
1698                 err = gve_close(priv->dev);
1699                 /* we have already tried to reset in close,
1700                  * just fail at this point
1701                  */
1702                 if (err)
1703                         return err;
1704                 priv->tx_cfg = new_tx_config;
1705                 priv->rx_cfg = new_rx_config;
1706
1707                 err = gve_open(priv->dev);
1708                 if (err)
1709                         goto err;
1710
1711                 return 0;
1712         }
1713         /* Set the config for the next up. */
1714         priv->tx_cfg = new_tx_config;
1715         priv->rx_cfg = new_rx_config;
1716
1717         return 0;
1718 err:
1719         netif_err(priv, drv, priv->dev,
1720                   "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1721         gve_turndown(priv);
1722         return err;
1723 }
1724
1725 static void gve_turndown(struct gve_priv *priv)
1726 {
1727         int idx;
1728
1729         if (netif_carrier_ok(priv->dev))
1730                 netif_carrier_off(priv->dev);
1731
1732         if (!gve_get_napi_enabled(priv))
1733                 return;
1734
1735         /* Disable napi to prevent more work from coming in */
1736         for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1737                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1738                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1739
1740                 napi_disable(&block->napi);
1741         }
1742         for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1743                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1744                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1745
1746                 napi_disable(&block->napi);
1747         }
1748
1749         /* Stop tx queues */
1750         netif_tx_disable(priv->dev);
1751
1752         gve_clear_napi_enabled(priv);
1753         gve_clear_report_stats(priv);
1754 }
1755
1756 static void gve_turnup(struct gve_priv *priv)
1757 {
1758         int idx;
1759
1760         /* Start the tx queues */
1761         netif_tx_start_all_queues(priv->dev);
1762
1763         /* Enable napi and unmask interrupts for all queues */
1764         for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1765                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1766                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1767
1768                 napi_enable(&block->napi);
1769                 if (gve_is_gqi(priv)) {
1770                         iowrite32be(0, gve_irq_doorbell(priv, block));
1771                 } else {
1772                         gve_set_itr_coalesce_usecs_dqo(priv, block,
1773                                                        priv->tx_coalesce_usecs);
1774                 }
1775         }
1776         for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1777                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1778                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1779
1780                 napi_enable(&block->napi);
1781                 if (gve_is_gqi(priv)) {
1782                         iowrite32be(0, gve_irq_doorbell(priv, block));
1783                 } else {
1784                         gve_set_itr_coalesce_usecs_dqo(priv, block,
1785                                                        priv->rx_coalesce_usecs);
1786                 }
1787         }
1788
1789         gve_set_napi_enabled(priv);
1790 }
1791
1792 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1793 {
1794         struct gve_notify_block *block;
1795         struct gve_tx_ring *tx = NULL;
1796         struct gve_priv *priv;
1797         u32 last_nic_done;
1798         u32 current_time;
1799         u32 ntfy_idx;
1800
1801         netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1802         priv = netdev_priv(dev);
1803         if (txqueue > priv->tx_cfg.num_queues)
1804                 goto reset;
1805
1806         ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1807         if (ntfy_idx >= priv->num_ntfy_blks)
1808                 goto reset;
1809
1810         block = &priv->ntfy_blocks[ntfy_idx];
1811         tx = block->tx;
1812
1813         current_time = jiffies_to_msecs(jiffies);
1814         if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1815                 goto reset;
1816
1817         /* Check to see if there are missed completions, which will allow us to
1818          * kick the queue.
1819          */
1820         last_nic_done = gve_tx_load_event_counter(priv, tx);
1821         if (last_nic_done - tx->done) {
1822                 netdev_info(dev, "Kicking queue %d", txqueue);
1823                 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1824                 napi_schedule(&block->napi);
1825                 tx->last_kick_msec = current_time;
1826                 goto out;
1827         } // Else reset.
1828
1829 reset:
1830         gve_schedule_reset(priv);
1831
1832 out:
1833         if (tx)
1834                 tx->queue_timeout++;
1835         priv->tx_timeo_cnt++;
1836 }
1837
1838 static int gve_set_features(struct net_device *netdev,
1839                             netdev_features_t features)
1840 {
1841         const netdev_features_t orig_features = netdev->features;
1842         struct gve_priv *priv = netdev_priv(netdev);
1843         int err;
1844
1845         if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1846                 netdev->features ^= NETIF_F_LRO;
1847                 if (netif_carrier_ok(netdev)) {
1848                         /* To make this process as simple as possible we
1849                          * teardown the device, set the new configuration,
1850                          * and then bring the device up again.
1851                          */
1852                         err = gve_close(netdev);
1853                         /* We have already tried to reset in close, just fail
1854                          * at this point.
1855                          */
1856                         if (err)
1857                                 goto err;
1858
1859                         err = gve_open(netdev);
1860                         if (err)
1861                                 goto err;
1862                 }
1863         }
1864
1865         return 0;
1866 err:
1867         /* Reverts the change on error. */
1868         netdev->features = orig_features;
1869         netif_err(priv, drv, netdev,
1870                   "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1871         return err;
1872 }
1873
1874 static const struct net_device_ops gve_netdev_ops = {
1875         .ndo_start_xmit         =       gve_start_xmit,
1876         .ndo_open               =       gve_open,
1877         .ndo_stop               =       gve_close,
1878         .ndo_get_stats64        =       gve_get_stats,
1879         .ndo_tx_timeout         =       gve_tx_timeout,
1880         .ndo_set_features       =       gve_set_features,
1881         .ndo_bpf                =       gve_xdp,
1882         .ndo_xdp_xmit           =       gve_xdp_xmit,
1883         .ndo_xsk_wakeup         =       gve_xsk_wakeup,
1884 };
1885
1886 static void gve_handle_status(struct gve_priv *priv, u32 status)
1887 {
1888         if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1889                 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1890                 gve_set_do_reset(priv);
1891         }
1892         if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1893                 priv->stats_report_trigger_cnt++;
1894                 gve_set_do_report_stats(priv);
1895         }
1896 }
1897
1898 static void gve_handle_reset(struct gve_priv *priv)
1899 {
1900         /* A service task will be scheduled at the end of probe to catch any
1901          * resets that need to happen, and we don't want to reset until
1902          * probe is done.
1903          */
1904         if (gve_get_probe_in_progress(priv))
1905                 return;
1906
1907         if (gve_get_do_reset(priv)) {
1908                 rtnl_lock();
1909                 gve_reset(priv, false);
1910                 rtnl_unlock();
1911         }
1912 }
1913
1914 void gve_handle_report_stats(struct gve_priv *priv)
1915 {
1916         struct stats *stats = priv->stats_report->stats;
1917         int idx, stats_idx = 0;
1918         unsigned int start = 0;
1919         u64 tx_bytes;
1920
1921         if (!gve_get_report_stats(priv))
1922                 return;
1923
1924         be64_add_cpu(&priv->stats_report->written_count, 1);
1925         /* tx stats */
1926         if (priv->tx) {
1927                 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1928                         u32 last_completion = 0;
1929                         u32 tx_frames = 0;
1930
1931                         /* DQO doesn't currently support these metrics. */
1932                         if (gve_is_gqi(priv)) {
1933                                 last_completion = priv->tx[idx].done;
1934                                 tx_frames = priv->tx[idx].req;
1935                         }
1936
1937                         do {
1938                                 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1939                                 tx_bytes = priv->tx[idx].bytes_done;
1940                         } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1941                         stats[stats_idx++] = (struct stats) {
1942                                 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1943                                 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1944                                 .queue_id = cpu_to_be32(idx),
1945                         };
1946                         stats[stats_idx++] = (struct stats) {
1947                                 .stat_name = cpu_to_be32(TX_STOP_CNT),
1948                                 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1949                                 .queue_id = cpu_to_be32(idx),
1950                         };
1951                         stats[stats_idx++] = (struct stats) {
1952                                 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1953                                 .value = cpu_to_be64(tx_frames),
1954                                 .queue_id = cpu_to_be32(idx),
1955                         };
1956                         stats[stats_idx++] = (struct stats) {
1957                                 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1958                                 .value = cpu_to_be64(tx_bytes),
1959                                 .queue_id = cpu_to_be32(idx),
1960                         };
1961                         stats[stats_idx++] = (struct stats) {
1962                                 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1963                                 .value = cpu_to_be64(last_completion),
1964                                 .queue_id = cpu_to_be32(idx),
1965                         };
1966                         stats[stats_idx++] = (struct stats) {
1967                                 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1968                                 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1969                                 .queue_id = cpu_to_be32(idx),
1970                         };
1971                 }
1972         }
1973         /* rx stats */
1974         if (priv->rx) {
1975                 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1976                         stats[stats_idx++] = (struct stats) {
1977                                 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1978                                 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1979                                 .queue_id = cpu_to_be32(idx),
1980                         };
1981                         stats[stats_idx++] = (struct stats) {
1982                                 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1983                                 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1984                                 .queue_id = cpu_to_be32(idx),
1985                         };
1986                 }
1987         }
1988 }
1989
1990 /* Handle NIC status register changes, reset requests and report stats */
1991 static void gve_service_task(struct work_struct *work)
1992 {
1993         struct gve_priv *priv = container_of(work, struct gve_priv,
1994                                              service_task);
1995         u32 status = ioread32be(&priv->reg_bar0->device_status);
1996
1997         gve_handle_status(priv, status);
1998
1999         gve_handle_reset(priv);
2000         gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
2001 }
2002
2003 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
2004 {
2005         if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
2006                 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
2007                 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
2008                 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2009                 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2010         } else {
2011                 priv->dev->xdp_features = 0;
2012         }
2013 }
2014
2015 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2016 {
2017         int num_ntfy;
2018         int err;
2019
2020         /* Set up the adminq */
2021         err = gve_adminq_alloc(&priv->pdev->dev, priv);
2022         if (err) {
2023                 dev_err(&priv->pdev->dev,
2024                         "Failed to alloc admin queue: err=%d\n", err);
2025                 return err;
2026         }
2027
2028         err = gve_verify_driver_compatibility(priv);
2029         if (err) {
2030                 dev_err(&priv->pdev->dev,
2031                         "Could not verify driver compatibility: err=%d\n", err);
2032                 goto err;
2033         }
2034
2035         if (skip_describe_device)
2036                 goto setup_device;
2037
2038         priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2039         /* Get the initial information we need from the device */
2040         err = gve_adminq_describe_device(priv);
2041         if (err) {
2042                 dev_err(&priv->pdev->dev,
2043                         "Could not get device information: err=%d\n", err);
2044                 goto err;
2045         }
2046         priv->dev->mtu = priv->dev->max_mtu;
2047         num_ntfy = pci_msix_vec_count(priv->pdev);
2048         if (num_ntfy <= 0) {
2049                 dev_err(&priv->pdev->dev,
2050                         "could not count MSI-x vectors: err=%d\n", num_ntfy);
2051                 err = num_ntfy;
2052                 goto err;
2053         } else if (num_ntfy < GVE_MIN_MSIX) {
2054                 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2055                         GVE_MIN_MSIX, num_ntfy);
2056                 err = -EINVAL;
2057                 goto err;
2058         }
2059
2060         /* Big TCP is only supported on DQ*/
2061         if (!gve_is_gqi(priv))
2062                 netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
2063
2064         priv->num_registered_pages = 0;
2065         priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2066         /* gvnic has one Notification Block per MSI-x vector, except for the
2067          * management vector
2068          */
2069         priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2070         priv->mgmt_msix_idx = priv->num_ntfy_blks;
2071
2072         priv->tx_cfg.max_queues =
2073                 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2074         priv->rx_cfg.max_queues =
2075                 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2076
2077         priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2078         priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2079         if (priv->default_num_queues > 0) {
2080                 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2081                                                 priv->tx_cfg.num_queues);
2082                 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2083                                                 priv->rx_cfg.num_queues);
2084         }
2085
2086         dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2087                  priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2088         dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2089                  priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2090
2091         if (!gve_is_gqi(priv)) {
2092                 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2093                 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2094         }
2095
2096 setup_device:
2097         gve_set_netdev_xdp_features(priv);
2098         err = gve_setup_device_resources(priv);
2099         if (!err)
2100                 return 0;
2101 err:
2102         gve_adminq_free(&priv->pdev->dev, priv);
2103         return err;
2104 }
2105
2106 static void gve_teardown_priv_resources(struct gve_priv *priv)
2107 {
2108         gve_teardown_device_resources(priv);
2109         gve_adminq_free(&priv->pdev->dev, priv);
2110 }
2111
2112 static void gve_trigger_reset(struct gve_priv *priv)
2113 {
2114         /* Reset the device by releasing the AQ */
2115         gve_adminq_release(priv);
2116 }
2117
2118 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2119 {
2120         gve_trigger_reset(priv);
2121         /* With the reset having already happened, close cannot fail */
2122         if (was_up)
2123                 gve_close(priv->dev);
2124         gve_teardown_priv_resources(priv);
2125 }
2126
2127 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2128 {
2129         int err;
2130
2131         err = gve_init_priv(priv, true);
2132         if (err)
2133                 goto err;
2134         if (was_up) {
2135                 err = gve_open(priv->dev);
2136                 if (err)
2137                         goto err;
2138         }
2139         return 0;
2140 err:
2141         dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2142         gve_turndown(priv);
2143         return err;
2144 }
2145
2146 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2147 {
2148         bool was_up = netif_carrier_ok(priv->dev);
2149         int err;
2150
2151         dev_info(&priv->pdev->dev, "Performing reset\n");
2152         gve_clear_do_reset(priv);
2153         gve_set_reset_in_progress(priv);
2154         /* If we aren't attempting to teardown normally, just go turndown and
2155          * reset right away.
2156          */
2157         if (!attempt_teardown) {
2158                 gve_turndown(priv);
2159                 gve_reset_and_teardown(priv, was_up);
2160         } else {
2161                 /* Otherwise attempt to close normally */
2162                 if (was_up) {
2163                         err = gve_close(priv->dev);
2164                         /* If that fails reset as we did above */
2165                         if (err)
2166                                 gve_reset_and_teardown(priv, was_up);
2167                 }
2168                 /* Clean up any remaining resources */
2169                 gve_teardown_priv_resources(priv);
2170         }
2171
2172         /* Set it all back up */
2173         err = gve_reset_recovery(priv, was_up);
2174         gve_clear_reset_in_progress(priv);
2175         priv->reset_cnt++;
2176         priv->interface_up_cnt = 0;
2177         priv->interface_down_cnt = 0;
2178         priv->stats_report_trigger_cnt = 0;
2179         return err;
2180 }
2181
2182 static void gve_write_version(u8 __iomem *driver_version_register)
2183 {
2184         const char *c = gve_version_prefix;
2185
2186         while (*c) {
2187                 writeb(*c, driver_version_register);
2188                 c++;
2189         }
2190
2191         c = gve_version_str;
2192         while (*c) {
2193                 writeb(*c, driver_version_register);
2194                 c++;
2195         }
2196         writeb('\n', driver_version_register);
2197 }
2198
2199 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2200 {
2201         int max_tx_queues, max_rx_queues;
2202         struct net_device *dev;
2203         __be32 __iomem *db_bar;
2204         struct gve_registers __iomem *reg_bar;
2205         struct gve_priv *priv;
2206         int err;
2207
2208         err = pci_enable_device(pdev);
2209         if (err)
2210                 return err;
2211
2212         err = pci_request_regions(pdev, gve_driver_name);
2213         if (err)
2214                 goto abort_with_enabled;
2215
2216         pci_set_master(pdev);
2217
2218         err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2219         if (err) {
2220                 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2221                 goto abort_with_pci_region;
2222         }
2223
2224         reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2225         if (!reg_bar) {
2226                 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2227                 err = -ENOMEM;
2228                 goto abort_with_pci_region;
2229         }
2230
2231         db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2232         if (!db_bar) {
2233                 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2234                 err = -ENOMEM;
2235                 goto abort_with_reg_bar;
2236         }
2237
2238         gve_write_version(&reg_bar->driver_version);
2239         /* Get max queues to alloc etherdev */
2240         max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2241         max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2242         /* Alloc and setup the netdev and priv */
2243         dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2244         if (!dev) {
2245                 dev_err(&pdev->dev, "could not allocate netdev\n");
2246                 err = -ENOMEM;
2247                 goto abort_with_db_bar;
2248         }
2249         SET_NETDEV_DEV(dev, &pdev->dev);
2250         pci_set_drvdata(pdev, dev);
2251         dev->ethtool_ops = &gve_ethtool_ops;
2252         dev->netdev_ops = &gve_netdev_ops;
2253
2254         /* Set default and supported features.
2255          *
2256          * Features might be set in other locations as well (such as
2257          * `gve_adminq_describe_device`).
2258          */
2259         dev->hw_features = NETIF_F_HIGHDMA;
2260         dev->hw_features |= NETIF_F_SG;
2261         dev->hw_features |= NETIF_F_HW_CSUM;
2262         dev->hw_features |= NETIF_F_TSO;
2263         dev->hw_features |= NETIF_F_TSO6;
2264         dev->hw_features |= NETIF_F_TSO_ECN;
2265         dev->hw_features |= NETIF_F_RXCSUM;
2266         dev->hw_features |= NETIF_F_RXHASH;
2267         dev->features = dev->hw_features;
2268         dev->watchdog_timeo = 5 * HZ;
2269         dev->min_mtu = ETH_MIN_MTU;
2270         netif_carrier_off(dev);
2271
2272         priv = netdev_priv(dev);
2273         priv->dev = dev;
2274         priv->pdev = pdev;
2275         priv->msg_enable = DEFAULT_MSG_LEVEL;
2276         priv->reg_bar0 = reg_bar;
2277         priv->db_bar2 = db_bar;
2278         priv->service_task_flags = 0x0;
2279         priv->state_flags = 0x0;
2280         priv->ethtool_flags = 0x0;
2281
2282         gve_set_probe_in_progress(priv);
2283         priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2284         if (!priv->gve_wq) {
2285                 dev_err(&pdev->dev, "Could not allocate workqueue");
2286                 err = -ENOMEM;
2287                 goto abort_with_netdev;
2288         }
2289         INIT_WORK(&priv->service_task, gve_service_task);
2290         INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2291         priv->tx_cfg.max_queues = max_tx_queues;
2292         priv->rx_cfg.max_queues = max_rx_queues;
2293
2294         err = gve_init_priv(priv, false);
2295         if (err)
2296                 goto abort_with_wq;
2297
2298         err = register_netdev(dev);
2299         if (err)
2300                 goto abort_with_gve_init;
2301
2302         dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2303         dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2304         gve_clear_probe_in_progress(priv);
2305         queue_work(priv->gve_wq, &priv->service_task);
2306         return 0;
2307
2308 abort_with_gve_init:
2309         gve_teardown_priv_resources(priv);
2310
2311 abort_with_wq:
2312         destroy_workqueue(priv->gve_wq);
2313
2314 abort_with_netdev:
2315         free_netdev(dev);
2316
2317 abort_with_db_bar:
2318         pci_iounmap(pdev, db_bar);
2319
2320 abort_with_reg_bar:
2321         pci_iounmap(pdev, reg_bar);
2322
2323 abort_with_pci_region:
2324         pci_release_regions(pdev);
2325
2326 abort_with_enabled:
2327         pci_disable_device(pdev);
2328         return err;
2329 }
2330
2331 static void gve_remove(struct pci_dev *pdev)
2332 {
2333         struct net_device *netdev = pci_get_drvdata(pdev);
2334         struct gve_priv *priv = netdev_priv(netdev);
2335         __be32 __iomem *db_bar = priv->db_bar2;
2336         void __iomem *reg_bar = priv->reg_bar0;
2337
2338         unregister_netdev(netdev);
2339         gve_teardown_priv_resources(priv);
2340         destroy_workqueue(priv->gve_wq);
2341         free_netdev(netdev);
2342         pci_iounmap(pdev, db_bar);
2343         pci_iounmap(pdev, reg_bar);
2344         pci_release_regions(pdev);
2345         pci_disable_device(pdev);
2346 }
2347
2348 static void gve_shutdown(struct pci_dev *pdev)
2349 {
2350         struct net_device *netdev = pci_get_drvdata(pdev);
2351         struct gve_priv *priv = netdev_priv(netdev);
2352         bool was_up = netif_carrier_ok(priv->dev);
2353
2354         rtnl_lock();
2355         if (was_up && gve_close(priv->dev)) {
2356                 /* If the dev was up, attempt to close, if close fails, reset */
2357                 gve_reset_and_teardown(priv, was_up);
2358         } else {
2359                 /* If the dev wasn't up or close worked, finish tearing down */
2360                 gve_teardown_priv_resources(priv);
2361         }
2362         rtnl_unlock();
2363 }
2364
2365 #ifdef CONFIG_PM
2366 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2367 {
2368         struct net_device *netdev = pci_get_drvdata(pdev);
2369         struct gve_priv *priv = netdev_priv(netdev);
2370         bool was_up = netif_carrier_ok(priv->dev);
2371
2372         priv->suspend_cnt++;
2373         rtnl_lock();
2374         if (was_up && gve_close(priv->dev)) {
2375                 /* If the dev was up, attempt to close, if close fails, reset */
2376                 gve_reset_and_teardown(priv, was_up);
2377         } else {
2378                 /* If the dev wasn't up or close worked, finish tearing down */
2379                 gve_teardown_priv_resources(priv);
2380         }
2381         priv->up_before_suspend = was_up;
2382         rtnl_unlock();
2383         return 0;
2384 }
2385
2386 static int gve_resume(struct pci_dev *pdev)
2387 {
2388         struct net_device *netdev = pci_get_drvdata(pdev);
2389         struct gve_priv *priv = netdev_priv(netdev);
2390         int err;
2391
2392         priv->resume_cnt++;
2393         rtnl_lock();
2394         err = gve_reset_recovery(priv, priv->up_before_suspend);
2395         rtnl_unlock();
2396         return err;
2397 }
2398 #endif /* CONFIG_PM */
2399
2400 static const struct pci_device_id gve_id_table[] = {
2401         { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2402         { }
2403 };
2404
2405 static struct pci_driver gve_driver = {
2406         .name           = gve_driver_name,
2407         .id_table       = gve_id_table,
2408         .probe          = gve_probe,
2409         .remove         = gve_remove,
2410         .shutdown       = gve_shutdown,
2411 #ifdef CONFIG_PM
2412         .suspend        = gve_suspend,
2413         .resume         = gve_resume,
2414 #endif
2415 };
2416
2417 module_pci_driver(gve_driver);
2418
2419 MODULE_DEVICE_TABLE(pci, gve_id_table);
2420 MODULE_AUTHOR("Google, Inc.");
2421 MODULE_DESCRIPTION("Google Virtual NIC Driver");
2422 MODULE_LICENSE("Dual MIT/GPL");
2423 MODULE_VERSION(GVE_VERSION);