8fb70db63b8b8830d4fc74a7a67e3615f1eee5e5
[platform/kernel/linux-starfive.git] / drivers / net / ethernet / google / gve / gve_main.c
1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3  *
4  * Copyright (C) 2015-2021 Google, Inc.
5  */
6
7 #include <linux/bpf.h>
8 #include <linux/cpumask.h>
9 #include <linux/etherdevice.h>
10 #include <linux/filter.h>
11 #include <linux/interrupt.h>
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/sched.h>
15 #include <linux/timer.h>
16 #include <linux/workqueue.h>
17 #include <linux/utsname.h>
18 #include <linux/version.h>
19 #include <net/sch_generic.h>
20 #include <net/xdp_sock_drv.h>
21 #include "gve.h"
22 #include "gve_dqo.h"
23 #include "gve_adminq.h"
24 #include "gve_register.h"
25
26 #define GVE_DEFAULT_RX_COPYBREAK        (256)
27
28 #define DEFAULT_MSG_LEVEL       (NETIF_MSG_DRV | NETIF_MSG_LINK)
29 #define GVE_VERSION             "1.0.0"
30 #define GVE_VERSION_PREFIX      "GVE-"
31
32 // Minimum amount of time between queue kicks in msec (10 seconds)
33 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
34 #define DQO_TX_MAX      0x3FFFF
35
36 const char gve_version_str[] = GVE_VERSION;
37 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
38
39 static int gve_verify_driver_compatibility(struct gve_priv *priv)
40 {
41         int err;
42         struct gve_driver_info *driver_info;
43         dma_addr_t driver_info_bus;
44
45         driver_info = dma_alloc_coherent(&priv->pdev->dev,
46                                          sizeof(struct gve_driver_info),
47                                          &driver_info_bus, GFP_KERNEL);
48         if (!driver_info)
49                 return -ENOMEM;
50
51         *driver_info = (struct gve_driver_info) {
52                 .os_type = 1, /* Linux */
53                 .os_version_major = cpu_to_be32(LINUX_VERSION_MAJOR),
54                 .os_version_minor = cpu_to_be32(LINUX_VERSION_SUBLEVEL),
55                 .os_version_sub = cpu_to_be32(LINUX_VERSION_PATCHLEVEL),
56                 .driver_capability_flags = {
57                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS1),
58                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS2),
59                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS3),
60                         cpu_to_be64(GVE_DRIVER_CAPABILITY_FLAGS4),
61                 },
62         };
63         strscpy(driver_info->os_version_str1, utsname()->release,
64                 sizeof(driver_info->os_version_str1));
65         strscpy(driver_info->os_version_str2, utsname()->version,
66                 sizeof(driver_info->os_version_str2));
67
68         err = gve_adminq_verify_driver_compatibility(priv,
69                                                      sizeof(struct gve_driver_info),
70                                                      driver_info_bus);
71
72         /* It's ok if the device doesn't support this */
73         if (err == -EOPNOTSUPP)
74                 err = 0;
75
76         dma_free_coherent(&priv->pdev->dev,
77                           sizeof(struct gve_driver_info),
78                           driver_info, driver_info_bus);
79         return err;
80 }
81
82 static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
83 {
84         struct gve_priv *priv = netdev_priv(dev);
85
86         if (gve_is_gqi(priv))
87                 return gve_tx(skb, dev);
88         else
89                 return gve_tx_dqo(skb, dev);
90 }
91
92 static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
93 {
94         struct gve_priv *priv = netdev_priv(dev);
95         unsigned int start;
96         u64 packets, bytes;
97         int num_tx_queues;
98         int ring;
99
100         num_tx_queues = gve_num_tx_queues(priv);
101         if (priv->rx) {
102                 for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) {
103                         do {
104                                 start =
105                                   u64_stats_fetch_begin(&priv->rx[ring].statss);
106                                 packets = priv->rx[ring].rpackets;
107                                 bytes = priv->rx[ring].rbytes;
108                         } while (u64_stats_fetch_retry(&priv->rx[ring].statss,
109                                                        start));
110                         s->rx_packets += packets;
111                         s->rx_bytes += bytes;
112                 }
113         }
114         if (priv->tx) {
115                 for (ring = 0; ring < num_tx_queues; ring++) {
116                         do {
117                                 start =
118                                   u64_stats_fetch_begin(&priv->tx[ring].statss);
119                                 packets = priv->tx[ring].pkt_done;
120                                 bytes = priv->tx[ring].bytes_done;
121                         } while (u64_stats_fetch_retry(&priv->tx[ring].statss,
122                                                        start));
123                         s->tx_packets += packets;
124                         s->tx_bytes += bytes;
125                 }
126         }
127 }
128
129 static int gve_alloc_counter_array(struct gve_priv *priv)
130 {
131         priv->counter_array =
132                 dma_alloc_coherent(&priv->pdev->dev,
133                                    priv->num_event_counters *
134                                    sizeof(*priv->counter_array),
135                                    &priv->counter_array_bus, GFP_KERNEL);
136         if (!priv->counter_array)
137                 return -ENOMEM;
138
139         return 0;
140 }
141
142 static void gve_free_counter_array(struct gve_priv *priv)
143 {
144         if (!priv->counter_array)
145                 return;
146
147         dma_free_coherent(&priv->pdev->dev,
148                           priv->num_event_counters *
149                           sizeof(*priv->counter_array),
150                           priv->counter_array, priv->counter_array_bus);
151         priv->counter_array = NULL;
152 }
153
154 /* NIC requests to report stats */
155 static void gve_stats_report_task(struct work_struct *work)
156 {
157         struct gve_priv *priv = container_of(work, struct gve_priv,
158                                              stats_report_task);
159         if (gve_get_do_report_stats(priv)) {
160                 gve_handle_report_stats(priv);
161                 gve_clear_do_report_stats(priv);
162         }
163 }
164
165 static void gve_stats_report_schedule(struct gve_priv *priv)
166 {
167         if (!gve_get_probe_in_progress(priv) &&
168             !gve_get_reset_in_progress(priv)) {
169                 gve_set_do_report_stats(priv);
170                 queue_work(priv->gve_wq, &priv->stats_report_task);
171         }
172 }
173
174 static void gve_stats_report_timer(struct timer_list *t)
175 {
176         struct gve_priv *priv = from_timer(priv, t, stats_report_timer);
177
178         mod_timer(&priv->stats_report_timer,
179                   round_jiffies(jiffies +
180                   msecs_to_jiffies(priv->stats_report_timer_period)));
181         gve_stats_report_schedule(priv);
182 }
183
184 static int gve_alloc_stats_report(struct gve_priv *priv)
185 {
186         int tx_stats_num, rx_stats_num;
187
188         tx_stats_num = (GVE_TX_STATS_REPORT_NUM + NIC_TX_STATS_REPORT_NUM) *
189                        gve_num_tx_queues(priv);
190         rx_stats_num = (GVE_RX_STATS_REPORT_NUM + NIC_RX_STATS_REPORT_NUM) *
191                        priv->rx_cfg.num_queues;
192         priv->stats_report_len = struct_size(priv->stats_report, stats,
193                                              tx_stats_num + rx_stats_num);
194         priv->stats_report =
195                 dma_alloc_coherent(&priv->pdev->dev, priv->stats_report_len,
196                                    &priv->stats_report_bus, GFP_KERNEL);
197         if (!priv->stats_report)
198                 return -ENOMEM;
199         /* Set up timer for the report-stats task */
200         timer_setup(&priv->stats_report_timer, gve_stats_report_timer, 0);
201         priv->stats_report_timer_period = GVE_STATS_REPORT_TIMER_PERIOD;
202         return 0;
203 }
204
205 static void gve_free_stats_report(struct gve_priv *priv)
206 {
207         if (!priv->stats_report)
208                 return;
209
210         del_timer_sync(&priv->stats_report_timer);
211         dma_free_coherent(&priv->pdev->dev, priv->stats_report_len,
212                           priv->stats_report, priv->stats_report_bus);
213         priv->stats_report = NULL;
214 }
215
216 static irqreturn_t gve_mgmnt_intr(int irq, void *arg)
217 {
218         struct gve_priv *priv = arg;
219
220         queue_work(priv->gve_wq, &priv->service_task);
221         return IRQ_HANDLED;
222 }
223
224 static irqreturn_t gve_intr(int irq, void *arg)
225 {
226         struct gve_notify_block *block = arg;
227         struct gve_priv *priv = block->priv;
228
229         iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
230         napi_schedule_irqoff(&block->napi);
231         return IRQ_HANDLED;
232 }
233
234 static irqreturn_t gve_intr_dqo(int irq, void *arg)
235 {
236         struct gve_notify_block *block = arg;
237
238         /* Interrupts are automatically masked */
239         napi_schedule_irqoff(&block->napi);
240         return IRQ_HANDLED;
241 }
242
243 static int gve_napi_poll(struct napi_struct *napi, int budget)
244 {
245         struct gve_notify_block *block;
246         __be32 __iomem *irq_doorbell;
247         bool reschedule = false;
248         struct gve_priv *priv;
249         int work_done = 0;
250
251         block = container_of(napi, struct gve_notify_block, napi);
252         priv = block->priv;
253
254         if (block->tx) {
255                 if (block->tx->q_num < priv->tx_cfg.num_queues)
256                         reschedule |= gve_tx_poll(block, budget);
257                 else
258                         reschedule |= gve_xdp_poll(block, budget);
259         }
260
261         if (block->rx) {
262                 work_done = gve_rx_poll(block, budget);
263                 reschedule |= work_done == budget;
264         }
265
266         if (reschedule)
267                 return budget;
268
269        /* Complete processing - don't unmask irq if busy polling is enabled */
270         if (likely(napi_complete_done(napi, work_done))) {
271                 irq_doorbell = gve_irq_doorbell(priv, block);
272                 iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell);
273
274                 /* Ensure IRQ ACK is visible before we check pending work.
275                  * If queue had issued updates, it would be truly visible.
276                  */
277                 mb();
278
279                 if (block->tx)
280                         reschedule |= gve_tx_clean_pending(priv, block->tx);
281                 if (block->rx)
282                         reschedule |= gve_rx_work_pending(block->rx);
283
284                 if (reschedule && napi_reschedule(napi))
285                         iowrite32be(GVE_IRQ_MASK, irq_doorbell);
286         }
287         return work_done;
288 }
289
290 static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
291 {
292         struct gve_notify_block *block =
293                 container_of(napi, struct gve_notify_block, napi);
294         struct gve_priv *priv = block->priv;
295         bool reschedule = false;
296         int work_done = 0;
297
298         if (block->tx)
299                 reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
300
301         if (block->rx) {
302                 work_done = gve_rx_poll_dqo(block, budget);
303                 reschedule |= work_done == budget;
304         }
305
306         if (reschedule)
307                 return budget;
308
309         if (likely(napi_complete_done(napi, work_done))) {
310                 /* Enable interrupts again.
311                  *
312                  * We don't need to repoll afterwards because HW supports the
313                  * PCI MSI-X PBA feature.
314                  *
315                  * Another interrupt would be triggered if a new event came in
316                  * since the last one.
317                  */
318                 gve_write_irq_doorbell_dqo(priv, block,
319                                            GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
320         }
321
322         return work_done;
323 }
324
325 static int gve_alloc_notify_blocks(struct gve_priv *priv)
326 {
327         int num_vecs_requested = priv->num_ntfy_blks + 1;
328         unsigned int active_cpus;
329         int vecs_enabled;
330         int i, j;
331         int err;
332
333         priv->msix_vectors = kvcalloc(num_vecs_requested,
334                                       sizeof(*priv->msix_vectors), GFP_KERNEL);
335         if (!priv->msix_vectors)
336                 return -ENOMEM;
337         for (i = 0; i < num_vecs_requested; i++)
338                 priv->msix_vectors[i].entry = i;
339         vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors,
340                                              GVE_MIN_MSIX, num_vecs_requested);
341         if (vecs_enabled < 0) {
342                 dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n",
343                         GVE_MIN_MSIX, vecs_enabled);
344                 err = vecs_enabled;
345                 goto abort_with_msix_vectors;
346         }
347         if (vecs_enabled != num_vecs_requested) {
348                 int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1;
349                 int vecs_per_type = new_num_ntfy_blks / 2;
350                 int vecs_left = new_num_ntfy_blks % 2;
351
352                 priv->num_ntfy_blks = new_num_ntfy_blks;
353                 priv->mgmt_msix_idx = priv->num_ntfy_blks;
354                 priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
355                                                 vecs_per_type);
356                 priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
357                                                 vecs_per_type + vecs_left);
358                 dev_err(&priv->pdev->dev,
359                         "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n",
360                         vecs_enabled, priv->tx_cfg.max_queues,
361                         priv->rx_cfg.max_queues);
362                 if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)
363                         priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
364                 if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues)
365                         priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
366         }
367         /* Half the notification blocks go to TX and half to RX */
368         active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus());
369
370         /* Setup Management Vector  - the last vector */
371         snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "gve-mgmnt@pci:%s",
372                  pci_name(priv->pdev));
373         err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector,
374                           gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv);
375         if (err) {
376                 dev_err(&priv->pdev->dev, "Did not receive management vector.\n");
377                 goto abort_with_msix_enabled;
378         }
379         priv->irq_db_indices =
380                 dma_alloc_coherent(&priv->pdev->dev,
381                                    priv->num_ntfy_blks *
382                                    sizeof(*priv->irq_db_indices),
383                                    &priv->irq_db_indices_bus, GFP_KERNEL);
384         if (!priv->irq_db_indices) {
385                 err = -ENOMEM;
386                 goto abort_with_mgmt_vector;
387         }
388
389         priv->ntfy_blocks = kvzalloc(priv->num_ntfy_blks *
390                                      sizeof(*priv->ntfy_blocks), GFP_KERNEL);
391         if (!priv->ntfy_blocks) {
392                 err = -ENOMEM;
393                 goto abort_with_irq_db_indices;
394         }
395
396         /* Setup the other blocks - the first n-1 vectors */
397         for (i = 0; i < priv->num_ntfy_blks; i++) {
398                 struct gve_notify_block *block = &priv->ntfy_blocks[i];
399                 int msix_idx = i;
400
401                 snprintf(block->name, sizeof(block->name), "gve-ntfy-blk%d@pci:%s",
402                          i, pci_name(priv->pdev));
403                 block->priv = priv;
404                 err = request_irq(priv->msix_vectors[msix_idx].vector,
405                                   gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
406                                   0, block->name, block);
407                 if (err) {
408                         dev_err(&priv->pdev->dev,
409                                 "Failed to receive msix vector %d\n", i);
410                         goto abort_with_some_ntfy_blocks;
411                 }
412                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
413                                       get_cpu_mask(i % active_cpus));
414                 block->irq_db_index = &priv->irq_db_indices[i].index;
415         }
416         return 0;
417 abort_with_some_ntfy_blocks:
418         for (j = 0; j < i; j++) {
419                 struct gve_notify_block *block = &priv->ntfy_blocks[j];
420                 int msix_idx = j;
421
422                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
423                                       NULL);
424                 free_irq(priv->msix_vectors[msix_idx].vector, block);
425         }
426         kvfree(priv->ntfy_blocks);
427         priv->ntfy_blocks = NULL;
428 abort_with_irq_db_indices:
429         dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
430                           sizeof(*priv->irq_db_indices),
431                           priv->irq_db_indices, priv->irq_db_indices_bus);
432         priv->irq_db_indices = NULL;
433 abort_with_mgmt_vector:
434         free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
435 abort_with_msix_enabled:
436         pci_disable_msix(priv->pdev);
437 abort_with_msix_vectors:
438         kvfree(priv->msix_vectors);
439         priv->msix_vectors = NULL;
440         return err;
441 }
442
443 static void gve_free_notify_blocks(struct gve_priv *priv)
444 {
445         int i;
446
447         if (!priv->msix_vectors)
448                 return;
449
450         /* Free the irqs */
451         for (i = 0; i < priv->num_ntfy_blks; i++) {
452                 struct gve_notify_block *block = &priv->ntfy_blocks[i];
453                 int msix_idx = i;
454
455                 irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
456                                       NULL);
457                 free_irq(priv->msix_vectors[msix_idx].vector, block);
458         }
459         free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
460         kvfree(priv->ntfy_blocks);
461         priv->ntfy_blocks = NULL;
462         dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks *
463                           sizeof(*priv->irq_db_indices),
464                           priv->irq_db_indices, priv->irq_db_indices_bus);
465         priv->irq_db_indices = NULL;
466         pci_disable_msix(priv->pdev);
467         kvfree(priv->msix_vectors);
468         priv->msix_vectors = NULL;
469 }
470
471 static int gve_setup_device_resources(struct gve_priv *priv)
472 {
473         int err;
474
475         err = gve_alloc_counter_array(priv);
476         if (err)
477                 return err;
478         err = gve_alloc_notify_blocks(priv);
479         if (err)
480                 goto abort_with_counter;
481         err = gve_alloc_stats_report(priv);
482         if (err)
483                 goto abort_with_ntfy_blocks;
484         err = gve_adminq_configure_device_resources(priv,
485                                                     priv->counter_array_bus,
486                                                     priv->num_event_counters,
487                                                     priv->irq_db_indices_bus,
488                                                     priv->num_ntfy_blks);
489         if (unlikely(err)) {
490                 dev_err(&priv->pdev->dev,
491                         "could not setup device_resources: err=%d\n", err);
492                 err = -ENXIO;
493                 goto abort_with_stats_report;
494         }
495
496         if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
497                 priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
498                                                GFP_KERNEL);
499                 if (!priv->ptype_lut_dqo) {
500                         err = -ENOMEM;
501                         goto abort_with_stats_report;
502                 }
503                 err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
504                 if (err) {
505                         dev_err(&priv->pdev->dev,
506                                 "Failed to get ptype map: err=%d\n", err);
507                         goto abort_with_ptype_lut;
508                 }
509         }
510
511         err = gve_adminq_report_stats(priv, priv->stats_report_len,
512                                       priv->stats_report_bus,
513                                       GVE_STATS_REPORT_TIMER_PERIOD);
514         if (err)
515                 dev_err(&priv->pdev->dev,
516                         "Failed to report stats: err=%d\n", err);
517         gve_set_device_resources_ok(priv);
518         return 0;
519
520 abort_with_ptype_lut:
521         kvfree(priv->ptype_lut_dqo);
522         priv->ptype_lut_dqo = NULL;
523 abort_with_stats_report:
524         gve_free_stats_report(priv);
525 abort_with_ntfy_blocks:
526         gve_free_notify_blocks(priv);
527 abort_with_counter:
528         gve_free_counter_array(priv);
529
530         return err;
531 }
532
533 static void gve_trigger_reset(struct gve_priv *priv);
534
535 static void gve_teardown_device_resources(struct gve_priv *priv)
536 {
537         int err;
538
539         /* Tell device its resources are being freed */
540         if (gve_get_device_resources_ok(priv)) {
541                 /* detach the stats report */
542                 err = gve_adminq_report_stats(priv, 0, 0x0, GVE_STATS_REPORT_TIMER_PERIOD);
543                 if (err) {
544                         dev_err(&priv->pdev->dev,
545                                 "Failed to detach stats report: err=%d\n", err);
546                         gve_trigger_reset(priv);
547                 }
548                 err = gve_adminq_deconfigure_device_resources(priv);
549                 if (err) {
550                         dev_err(&priv->pdev->dev,
551                                 "Could not deconfigure device resources: err=%d\n",
552                                 err);
553                         gve_trigger_reset(priv);
554                 }
555         }
556
557         kvfree(priv->ptype_lut_dqo);
558         priv->ptype_lut_dqo = NULL;
559
560         gve_free_counter_array(priv);
561         gve_free_notify_blocks(priv);
562         gve_free_stats_report(priv);
563         gve_clear_device_resources_ok(priv);
564 }
565
566 static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
567                          int (*gve_poll)(struct napi_struct *, int))
568 {
569         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
570
571         netif_napi_add(priv->dev, &block->napi, gve_poll);
572 }
573
574 static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx)
575 {
576         struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
577
578         netif_napi_del(&block->napi);
579 }
580
581 static int gve_register_xdp_qpls(struct gve_priv *priv)
582 {
583         int start_id;
584         int err;
585         int i;
586
587         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
588         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
589                 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
590                 if (err) {
591                         netif_err(priv, drv, priv->dev,
592                                   "failed to register queue page list %d\n",
593                                   priv->qpls[i].id);
594                         /* This failure will trigger a reset - no need to clean
595                          * up
596                          */
597                         return err;
598                 }
599         }
600         return 0;
601 }
602
603 static int gve_register_qpls(struct gve_priv *priv)
604 {
605         int start_id;
606         int err;
607         int i;
608
609         start_id = gve_tx_start_qpl_id(priv);
610         for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
611                 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
612                 if (err) {
613                         netif_err(priv, drv, priv->dev,
614                                   "failed to register queue page list %d\n",
615                                   priv->qpls[i].id);
616                         /* This failure will trigger a reset - no need to clean
617                          * up
618                          */
619                         return err;
620                 }
621         }
622
623         start_id = gve_rx_start_qpl_id(priv);
624         for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
625                 err = gve_adminq_register_page_list(priv, &priv->qpls[i]);
626                 if (err) {
627                         netif_err(priv, drv, priv->dev,
628                                   "failed to register queue page list %d\n",
629                                   priv->qpls[i].id);
630                         /* This failure will trigger a reset - no need to clean
631                          * up
632                          */
633                         return err;
634                 }
635         }
636         return 0;
637 }
638
639 static int gve_unregister_xdp_qpls(struct gve_priv *priv)
640 {
641         int start_id;
642         int err;
643         int i;
644
645         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
646         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
647                 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
648                 /* This failure will trigger a reset - no need to clean up */
649                 if (err) {
650                         netif_err(priv, drv, priv->dev,
651                                   "Failed to unregister queue page list %d\n",
652                                   priv->qpls[i].id);
653                         return err;
654                 }
655         }
656         return 0;
657 }
658
659 static int gve_unregister_qpls(struct gve_priv *priv)
660 {
661         int start_id;
662         int err;
663         int i;
664
665         start_id = gve_tx_start_qpl_id(priv);
666         for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
667                 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
668                 /* This failure will trigger a reset - no need to clean up */
669                 if (err) {
670                         netif_err(priv, drv, priv->dev,
671                                   "Failed to unregister queue page list %d\n",
672                                   priv->qpls[i].id);
673                         return err;
674                 }
675         }
676
677         start_id = gve_rx_start_qpl_id(priv);
678         for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
679                 err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id);
680                 /* This failure will trigger a reset - no need to clean up */
681                 if (err) {
682                         netif_err(priv, drv, priv->dev,
683                                   "Failed to unregister queue page list %d\n",
684                                   priv->qpls[i].id);
685                         return err;
686                 }
687         }
688         return 0;
689 }
690
691 static int gve_create_xdp_rings(struct gve_priv *priv)
692 {
693         int err;
694
695         err = gve_adminq_create_tx_queues(priv,
696                                           gve_xdp_tx_start_queue_id(priv),
697                                           priv->num_xdp_queues);
698         if (err) {
699                 netif_err(priv, drv, priv->dev, "failed to create %d XDP tx queues\n",
700                           priv->num_xdp_queues);
701                 /* This failure will trigger a reset - no need to clean
702                  * up
703                  */
704                 return err;
705         }
706         netif_dbg(priv, drv, priv->dev, "created %d XDP tx queues\n",
707                   priv->num_xdp_queues);
708
709         return 0;
710 }
711
712 static int gve_create_rings(struct gve_priv *priv)
713 {
714         int num_tx_queues = gve_num_tx_queues(priv);
715         int err;
716         int i;
717
718         err = gve_adminq_create_tx_queues(priv, 0, num_tx_queues);
719         if (err) {
720                 netif_err(priv, drv, priv->dev, "failed to create %d tx queues\n",
721                           num_tx_queues);
722                 /* This failure will trigger a reset - no need to clean
723                  * up
724                  */
725                 return err;
726         }
727         netif_dbg(priv, drv, priv->dev, "created %d tx queues\n",
728                   num_tx_queues);
729
730         err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues);
731         if (err) {
732                 netif_err(priv, drv, priv->dev, "failed to create %d rx queues\n",
733                           priv->rx_cfg.num_queues);
734                 /* This failure will trigger a reset - no need to clean
735                  * up
736                  */
737                 return err;
738         }
739         netif_dbg(priv, drv, priv->dev, "created %d rx queues\n",
740                   priv->rx_cfg.num_queues);
741
742         if (gve_is_gqi(priv)) {
743                 /* Rx data ring has been prefilled with packet buffers at queue
744                  * allocation time.
745                  *
746                  * Write the doorbell to provide descriptor slots and packet
747                  * buffers to the NIC.
748                  */
749                 for (i = 0; i < priv->rx_cfg.num_queues; i++)
750                         gve_rx_write_doorbell(priv, &priv->rx[i]);
751         } else {
752                 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
753                         /* Post buffers and ring doorbell. */
754                         gve_rx_post_buffers_dqo(&priv->rx[i]);
755                 }
756         }
757
758         return 0;
759 }
760
761 static void add_napi_init_xdp_sync_stats(struct gve_priv *priv,
762                                          int (*napi_poll)(struct napi_struct *napi,
763                                                           int budget))
764 {
765         int start_id = gve_xdp_tx_start_queue_id(priv);
766         int i;
767
768         /* Add xdp tx napi & init sync stats*/
769         for (i = start_id; i < start_id + priv->num_xdp_queues; i++) {
770                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
771
772                 u64_stats_init(&priv->tx[i].statss);
773                 priv->tx[i].ntfy_id = ntfy_idx;
774                 gve_add_napi(priv, ntfy_idx, napi_poll);
775         }
776 }
777
778 static void add_napi_init_sync_stats(struct gve_priv *priv,
779                                      int (*napi_poll)(struct napi_struct *napi,
780                                                       int budget))
781 {
782         int i;
783
784         /* Add tx napi & init sync stats*/
785         for (i = 0; i < gve_num_tx_queues(priv); i++) {
786                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
787
788                 u64_stats_init(&priv->tx[i].statss);
789                 priv->tx[i].ntfy_id = ntfy_idx;
790                 gve_add_napi(priv, ntfy_idx, napi_poll);
791         }
792         /* Add rx napi  & init sync stats*/
793         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
794                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
795
796                 u64_stats_init(&priv->rx[i].statss);
797                 priv->rx[i].ntfy_id = ntfy_idx;
798                 gve_add_napi(priv, ntfy_idx, napi_poll);
799         }
800 }
801
802 static void gve_tx_free_rings(struct gve_priv *priv, int start_id, int num_rings)
803 {
804         if (gve_is_gqi(priv)) {
805                 gve_tx_free_rings_gqi(priv, start_id, num_rings);
806         } else {
807                 gve_tx_free_rings_dqo(priv);
808         }
809 }
810
811 static int gve_alloc_xdp_rings(struct gve_priv *priv)
812 {
813         int start_id;
814         int err = 0;
815
816         if (!priv->num_xdp_queues)
817                 return 0;
818
819         start_id = gve_xdp_tx_start_queue_id(priv);
820         err = gve_tx_alloc_rings(priv, start_id, priv->num_xdp_queues);
821         if (err)
822                 return err;
823         add_napi_init_xdp_sync_stats(priv, gve_napi_poll);
824
825         return 0;
826 }
827
828 static int gve_alloc_rings(struct gve_priv *priv)
829 {
830         int err;
831
832         /* Setup tx rings */
833         priv->tx = kvcalloc(priv->tx_cfg.max_queues, sizeof(*priv->tx),
834                             GFP_KERNEL);
835         if (!priv->tx)
836                 return -ENOMEM;
837
838         if (gve_is_gqi(priv))
839                 err = gve_tx_alloc_rings(priv, 0, gve_num_tx_queues(priv));
840         else
841                 err = gve_tx_alloc_rings_dqo(priv);
842         if (err)
843                 goto free_tx;
844
845         /* Setup rx rings */
846         priv->rx = kvcalloc(priv->rx_cfg.max_queues, sizeof(*priv->rx),
847                             GFP_KERNEL);
848         if (!priv->rx) {
849                 err = -ENOMEM;
850                 goto free_tx_queue;
851         }
852
853         if (gve_is_gqi(priv))
854                 err = gve_rx_alloc_rings(priv);
855         else
856                 err = gve_rx_alloc_rings_dqo(priv);
857         if (err)
858                 goto free_rx;
859
860         if (gve_is_gqi(priv))
861                 add_napi_init_sync_stats(priv, gve_napi_poll);
862         else
863                 add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
864
865         return 0;
866
867 free_rx:
868         kvfree(priv->rx);
869         priv->rx = NULL;
870 free_tx_queue:
871         gve_tx_free_rings(priv, 0, gve_num_tx_queues(priv));
872 free_tx:
873         kvfree(priv->tx);
874         priv->tx = NULL;
875         return err;
876 }
877
878 static int gve_destroy_xdp_rings(struct gve_priv *priv)
879 {
880         int start_id;
881         int err;
882
883         start_id = gve_xdp_tx_start_queue_id(priv);
884         err = gve_adminq_destroy_tx_queues(priv,
885                                            start_id,
886                                            priv->num_xdp_queues);
887         if (err) {
888                 netif_err(priv, drv, priv->dev,
889                           "failed to destroy XDP queues\n");
890                 /* This failure will trigger a reset - no need to clean up */
891                 return err;
892         }
893         netif_dbg(priv, drv, priv->dev, "destroyed XDP queues\n");
894
895         return 0;
896 }
897
898 static int gve_destroy_rings(struct gve_priv *priv)
899 {
900         int num_tx_queues = gve_num_tx_queues(priv);
901         int err;
902
903         err = gve_adminq_destroy_tx_queues(priv, 0, num_tx_queues);
904         if (err) {
905                 netif_err(priv, drv, priv->dev,
906                           "failed to destroy tx queues\n");
907                 /* This failure will trigger a reset - no need to clean up */
908                 return err;
909         }
910         netif_dbg(priv, drv, priv->dev, "destroyed tx queues\n");
911         err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues);
912         if (err) {
913                 netif_err(priv, drv, priv->dev,
914                           "failed to destroy rx queues\n");
915                 /* This failure will trigger a reset - no need to clean up */
916                 return err;
917         }
918         netif_dbg(priv, drv, priv->dev, "destroyed rx queues\n");
919         return 0;
920 }
921
922 static void gve_rx_free_rings(struct gve_priv *priv)
923 {
924         if (gve_is_gqi(priv))
925                 gve_rx_free_rings_gqi(priv);
926         else
927                 gve_rx_free_rings_dqo(priv);
928 }
929
930 static void gve_free_xdp_rings(struct gve_priv *priv)
931 {
932         int ntfy_idx, start_id;
933         int i;
934
935         start_id = gve_xdp_tx_start_queue_id(priv);
936         if (priv->tx) {
937                 for (i = start_id; i <  start_id + priv->num_xdp_queues; i++) {
938                         ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
939                         gve_remove_napi(priv, ntfy_idx);
940                 }
941                 gve_tx_free_rings(priv, start_id, priv->num_xdp_queues);
942         }
943 }
944
945 static void gve_free_rings(struct gve_priv *priv)
946 {
947         int num_tx_queues = gve_num_tx_queues(priv);
948         int ntfy_idx;
949         int i;
950
951         if (priv->tx) {
952                 for (i = 0; i < num_tx_queues; i++) {
953                         ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
954                         gve_remove_napi(priv, ntfy_idx);
955                 }
956                 gve_tx_free_rings(priv, 0, num_tx_queues);
957                 kvfree(priv->tx);
958                 priv->tx = NULL;
959         }
960         if (priv->rx) {
961                 for (i = 0; i < priv->rx_cfg.num_queues; i++) {
962                         ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
963                         gve_remove_napi(priv, ntfy_idx);
964                 }
965                 gve_rx_free_rings(priv);
966                 kvfree(priv->rx);
967                 priv->rx = NULL;
968         }
969 }
970
971 int gve_alloc_page(struct gve_priv *priv, struct device *dev,
972                    struct page **page, dma_addr_t *dma,
973                    enum dma_data_direction dir, gfp_t gfp_flags)
974 {
975         *page = alloc_page(gfp_flags);
976         if (!*page) {
977                 priv->page_alloc_fail++;
978                 return -ENOMEM;
979         }
980         *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir);
981         if (dma_mapping_error(dev, *dma)) {
982                 priv->dma_mapping_error++;
983                 put_page(*page);
984                 return -ENOMEM;
985         }
986         return 0;
987 }
988
989 static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id,
990                                      int pages)
991 {
992         struct gve_queue_page_list *qpl = &priv->qpls[id];
993         int err;
994         int i;
995
996         if (pages + priv->num_registered_pages > priv->max_registered_pages) {
997                 netif_err(priv, drv, priv->dev,
998                           "Reached max number of registered pages %llu > %llu\n",
999                           pages + priv->num_registered_pages,
1000                           priv->max_registered_pages);
1001                 return -EINVAL;
1002         }
1003
1004         qpl->id = id;
1005         qpl->num_entries = 0;
1006         qpl->pages = kvcalloc(pages, sizeof(*qpl->pages), GFP_KERNEL);
1007         /* caller handles clean up */
1008         if (!qpl->pages)
1009                 return -ENOMEM;
1010         qpl->page_buses = kvcalloc(pages, sizeof(*qpl->page_buses), GFP_KERNEL);
1011         /* caller handles clean up */
1012         if (!qpl->page_buses)
1013                 return -ENOMEM;
1014
1015         for (i = 0; i < pages; i++) {
1016                 err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i],
1017                                      &qpl->page_buses[i],
1018                                      gve_qpl_dma_dir(priv, id), GFP_KERNEL);
1019                 /* caller handles clean up */
1020                 if (err)
1021                         return -ENOMEM;
1022                 qpl->num_entries++;
1023         }
1024         priv->num_registered_pages += pages;
1025
1026         return 0;
1027 }
1028
1029 void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma,
1030                    enum dma_data_direction dir)
1031 {
1032         if (!dma_mapping_error(dev, dma))
1033                 dma_unmap_page(dev, dma, PAGE_SIZE, dir);
1034         if (page)
1035                 put_page(page);
1036 }
1037
1038 static void gve_free_queue_page_list(struct gve_priv *priv, u32 id)
1039 {
1040         struct gve_queue_page_list *qpl = &priv->qpls[id];
1041         int i;
1042
1043         if (!qpl->pages)
1044                 return;
1045         if (!qpl->page_buses)
1046                 goto free_pages;
1047
1048         for (i = 0; i < qpl->num_entries; i++)
1049                 gve_free_page(&priv->pdev->dev, qpl->pages[i],
1050                               qpl->page_buses[i], gve_qpl_dma_dir(priv, id));
1051
1052         kvfree(qpl->page_buses);
1053         qpl->page_buses = NULL;
1054 free_pages:
1055         kvfree(qpl->pages);
1056         qpl->pages = NULL;
1057         priv->num_registered_pages -= qpl->num_entries;
1058 }
1059
1060 static int gve_alloc_xdp_qpls(struct gve_priv *priv)
1061 {
1062         int start_id;
1063         int i, j;
1064         int err;
1065
1066         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1067         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++) {
1068                 err = gve_alloc_queue_page_list(priv, i,
1069                                                 priv->tx_pages_per_qpl);
1070                 if (err)
1071                         goto free_qpls;
1072         }
1073
1074         return 0;
1075
1076 free_qpls:
1077         for (j = start_id; j <= i; j++)
1078                 gve_free_queue_page_list(priv, j);
1079         return err;
1080 }
1081
1082 static int gve_alloc_qpls(struct gve_priv *priv)
1083 {
1084         int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1085         int start_id;
1086         int i, j;
1087         int err;
1088
1089         if (priv->queue_format != GVE_GQI_QPL_FORMAT)
1090                 return 0;
1091
1092         priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
1093         if (!priv->qpls)
1094                 return -ENOMEM;
1095
1096         start_id = gve_tx_start_qpl_id(priv);
1097         for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
1098                 err = gve_alloc_queue_page_list(priv, i,
1099                                                 priv->tx_pages_per_qpl);
1100                 if (err)
1101                         goto free_qpls;
1102         }
1103
1104         start_id = gve_rx_start_qpl_id(priv);
1105         for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
1106                 err = gve_alloc_queue_page_list(priv, i,
1107                                                 priv->rx_data_slot_cnt);
1108                 if (err)
1109                         goto free_qpls;
1110         }
1111
1112         priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(max_queues) *
1113                                      sizeof(unsigned long) * BITS_PER_BYTE;
1114         priv->qpl_cfg.qpl_id_map = kvcalloc(BITS_TO_LONGS(max_queues),
1115                                             sizeof(unsigned long), GFP_KERNEL);
1116         if (!priv->qpl_cfg.qpl_id_map) {
1117                 err = -ENOMEM;
1118                 goto free_qpls;
1119         }
1120
1121         return 0;
1122
1123 free_qpls:
1124         for (j = 0; j <= i; j++)
1125                 gve_free_queue_page_list(priv, j);
1126         kvfree(priv->qpls);
1127         priv->qpls = NULL;
1128         return err;
1129 }
1130
1131 static void gve_free_xdp_qpls(struct gve_priv *priv)
1132 {
1133         int start_id;
1134         int i;
1135
1136         start_id = gve_tx_qpl_id(priv, gve_xdp_tx_start_queue_id(priv));
1137         for (i = start_id; i < start_id + gve_num_xdp_qpls(priv); i++)
1138                 gve_free_queue_page_list(priv, i);
1139 }
1140
1141 static void gve_free_qpls(struct gve_priv *priv)
1142 {
1143         int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
1144         int i;
1145
1146         if (!priv->qpls)
1147                 return;
1148
1149         kvfree(priv->qpl_cfg.qpl_id_map);
1150         priv->qpl_cfg.qpl_id_map = NULL;
1151
1152         for (i = 0; i < max_queues; i++)
1153                 gve_free_queue_page_list(priv, i);
1154
1155         kvfree(priv->qpls);
1156         priv->qpls = NULL;
1157 }
1158
1159 /* Use this to schedule a reset when the device is capable of continuing
1160  * to handle other requests in its current state. If it is not, do a reset
1161  * in thread instead.
1162  */
1163 void gve_schedule_reset(struct gve_priv *priv)
1164 {
1165         gve_set_do_reset(priv);
1166         queue_work(priv->gve_wq, &priv->service_task);
1167 }
1168
1169 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up);
1170 static int gve_reset_recovery(struct gve_priv *priv, bool was_up);
1171 static void gve_turndown(struct gve_priv *priv);
1172 static void gve_turnup(struct gve_priv *priv);
1173
1174 static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev)
1175 {
1176         struct napi_struct *napi;
1177         struct gve_rx_ring *rx;
1178         int err = 0;
1179         int i, j;
1180         u32 tx_qid;
1181
1182         if (!priv->num_xdp_queues)
1183                 return 0;
1184
1185         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1186                 rx = &priv->rx[i];
1187                 napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1188
1189                 err = xdp_rxq_info_reg(&rx->xdp_rxq, dev, i,
1190                                        napi->napi_id);
1191                 if (err)
1192                         goto err;
1193                 err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq,
1194                                                  MEM_TYPE_PAGE_SHARED, NULL);
1195                 if (err)
1196                         goto err;
1197                 rx->xsk_pool = xsk_get_pool_from_qid(dev, i);
1198                 if (rx->xsk_pool) {
1199                         err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i,
1200                                                napi->napi_id);
1201                         if (err)
1202                                 goto err;
1203                         err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1204                                                          MEM_TYPE_XSK_BUFF_POOL, NULL);
1205                         if (err)
1206                                 goto err;
1207                         xsk_pool_set_rxq_info(rx->xsk_pool,
1208                                               &rx->xsk_rxq);
1209                 }
1210         }
1211
1212         for (i = 0; i < priv->num_xdp_queues; i++) {
1213                 tx_qid = gve_xdp_tx_queue_id(priv, i);
1214                 priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i);
1215         }
1216         return 0;
1217
1218 err:
1219         for (j = i; j >= 0; j--) {
1220                 rx = &priv->rx[j];
1221                 if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
1222                         xdp_rxq_info_unreg(&rx->xdp_rxq);
1223                 if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1224                         xdp_rxq_info_unreg(&rx->xsk_rxq);
1225         }
1226         return err;
1227 }
1228
1229 static void gve_unreg_xdp_info(struct gve_priv *priv)
1230 {
1231         int i, tx_qid;
1232
1233         if (!priv->num_xdp_queues)
1234                 return;
1235
1236         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1237                 struct gve_rx_ring *rx = &priv->rx[i];
1238
1239                 xdp_rxq_info_unreg(&rx->xdp_rxq);
1240                 if (rx->xsk_pool) {
1241                         xdp_rxq_info_unreg(&rx->xsk_rxq);
1242                         rx->xsk_pool = NULL;
1243                 }
1244         }
1245
1246         for (i = 0; i < priv->num_xdp_queues; i++) {
1247                 tx_qid = gve_xdp_tx_queue_id(priv, i);
1248                 priv->tx[tx_qid].xsk_pool = NULL;
1249         }
1250 }
1251
1252 static void gve_drain_page_cache(struct gve_priv *priv)
1253 {
1254         struct page_frag_cache *nc;
1255         int i;
1256
1257         for (i = 0; i < priv->rx_cfg.num_queues; i++) {
1258                 nc = &priv->rx[i].page_cache;
1259                 if (nc->va) {
1260                         __page_frag_cache_drain(virt_to_page(nc->va),
1261                                                 nc->pagecnt_bias);
1262                         nc->va = NULL;
1263                 }
1264         }
1265 }
1266
1267 static int gve_open(struct net_device *dev)
1268 {
1269         struct gve_priv *priv = netdev_priv(dev);
1270         int err;
1271
1272         if (priv->xdp_prog)
1273                 priv->num_xdp_queues = priv->rx_cfg.num_queues;
1274         else
1275                 priv->num_xdp_queues = 0;
1276
1277         err = gve_alloc_qpls(priv);
1278         if (err)
1279                 return err;
1280
1281         err = gve_alloc_rings(priv);
1282         if (err)
1283                 goto free_qpls;
1284
1285         err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues);
1286         if (err)
1287                 goto free_rings;
1288         err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues);
1289         if (err)
1290                 goto free_rings;
1291
1292         err = gve_reg_xdp_info(priv, dev);
1293         if (err)
1294                 goto free_rings;
1295
1296         err = gve_register_qpls(priv);
1297         if (err)
1298                 goto reset;
1299
1300         if (!gve_is_gqi(priv)) {
1301                 /* Hard code this for now. This may be tuned in the future for
1302                  * performance.
1303                  */
1304                 priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
1305         }
1306         err = gve_create_rings(priv);
1307         if (err)
1308                 goto reset;
1309
1310         gve_set_device_rings_ok(priv);
1311
1312         if (gve_get_report_stats(priv))
1313                 mod_timer(&priv->stats_report_timer,
1314                           round_jiffies(jiffies +
1315                                 msecs_to_jiffies(priv->stats_report_timer_period)));
1316
1317         gve_turnup(priv);
1318         queue_work(priv->gve_wq, &priv->service_task);
1319         priv->interface_up_cnt++;
1320         return 0;
1321
1322 free_rings:
1323         gve_free_rings(priv);
1324 free_qpls:
1325         gve_free_qpls(priv);
1326         return err;
1327
1328 reset:
1329         /* This must have been called from a reset due to the rtnl lock
1330          * so just return at this point.
1331          */
1332         if (gve_get_reset_in_progress(priv))
1333                 return err;
1334         /* Otherwise reset before returning */
1335         gve_reset_and_teardown(priv, true);
1336         /* if this fails there is nothing we can do so just ignore the return */
1337         gve_reset_recovery(priv, false);
1338         /* return the original error */
1339         return err;
1340 }
1341
1342 static int gve_close(struct net_device *dev)
1343 {
1344         struct gve_priv *priv = netdev_priv(dev);
1345         int err;
1346
1347         netif_carrier_off(dev);
1348         if (gve_get_device_rings_ok(priv)) {
1349                 gve_turndown(priv);
1350                 gve_drain_page_cache(priv);
1351                 err = gve_destroy_rings(priv);
1352                 if (err)
1353                         goto err;
1354                 err = gve_unregister_qpls(priv);
1355                 if (err)
1356                         goto err;
1357                 gve_clear_device_rings_ok(priv);
1358         }
1359         del_timer_sync(&priv->stats_report_timer);
1360
1361         gve_unreg_xdp_info(priv);
1362         gve_free_rings(priv);
1363         gve_free_qpls(priv);
1364         priv->interface_down_cnt++;
1365         return 0;
1366
1367 err:
1368         /* This must have been called from a reset due to the rtnl lock
1369          * so just return at this point.
1370          */
1371         if (gve_get_reset_in_progress(priv))
1372                 return err;
1373         /* Otherwise reset before returning */
1374         gve_reset_and_teardown(priv, true);
1375         return gve_reset_recovery(priv, false);
1376 }
1377
1378 static int gve_remove_xdp_queues(struct gve_priv *priv)
1379 {
1380         int err;
1381
1382         err = gve_destroy_xdp_rings(priv);
1383         if (err)
1384                 return err;
1385
1386         err = gve_unregister_xdp_qpls(priv);
1387         if (err)
1388                 return err;
1389
1390         gve_unreg_xdp_info(priv);
1391         gve_free_xdp_rings(priv);
1392         gve_free_xdp_qpls(priv);
1393         priv->num_xdp_queues = 0;
1394         return 0;
1395 }
1396
1397 static int gve_add_xdp_queues(struct gve_priv *priv)
1398 {
1399         int err;
1400
1401         priv->num_xdp_queues = priv->tx_cfg.num_queues;
1402
1403         err = gve_alloc_xdp_qpls(priv);
1404         if (err)
1405                 goto err;
1406
1407         err = gve_alloc_xdp_rings(priv);
1408         if (err)
1409                 goto free_xdp_qpls;
1410
1411         err = gve_reg_xdp_info(priv, priv->dev);
1412         if (err)
1413                 goto free_xdp_rings;
1414
1415         err = gve_register_xdp_qpls(priv);
1416         if (err)
1417                 goto free_xdp_rings;
1418
1419         err = gve_create_xdp_rings(priv);
1420         if (err)
1421                 goto free_xdp_rings;
1422
1423         return 0;
1424
1425 free_xdp_rings:
1426         gve_free_xdp_rings(priv);
1427 free_xdp_qpls:
1428         gve_free_xdp_qpls(priv);
1429 err:
1430         priv->num_xdp_queues = 0;
1431         return err;
1432 }
1433
1434 static void gve_handle_link_status(struct gve_priv *priv, bool link_status)
1435 {
1436         if (!gve_get_napi_enabled(priv))
1437                 return;
1438
1439         if (link_status == netif_carrier_ok(priv->dev))
1440                 return;
1441
1442         if (link_status) {
1443                 netdev_info(priv->dev, "Device link is up.\n");
1444                 netif_carrier_on(priv->dev);
1445         } else {
1446                 netdev_info(priv->dev, "Device link is down.\n");
1447                 netif_carrier_off(priv->dev);
1448         }
1449 }
1450
1451 static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog,
1452                        struct netlink_ext_ack *extack)
1453 {
1454         struct bpf_prog *old_prog;
1455         int err = 0;
1456         u32 status;
1457
1458         old_prog = READ_ONCE(priv->xdp_prog);
1459         if (!netif_carrier_ok(priv->dev)) {
1460                 WRITE_ONCE(priv->xdp_prog, prog);
1461                 if (old_prog)
1462                         bpf_prog_put(old_prog);
1463                 return 0;
1464         }
1465
1466         gve_turndown(priv);
1467         if (!old_prog && prog) {
1468                 // Allocate XDP TX queues if an XDP program is
1469                 // being installed
1470                 err = gve_add_xdp_queues(priv);
1471                 if (err)
1472                         goto out;
1473         } else if (old_prog && !prog) {
1474                 // Remove XDP TX queues if an XDP program is
1475                 // being uninstalled
1476                 err = gve_remove_xdp_queues(priv);
1477                 if (err)
1478                         goto out;
1479         }
1480         WRITE_ONCE(priv->xdp_prog, prog);
1481         if (old_prog)
1482                 bpf_prog_put(old_prog);
1483
1484 out:
1485         gve_turnup(priv);
1486         status = ioread32be(&priv->reg_bar0->device_status);
1487         gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1488         return err;
1489 }
1490
1491 static int gve_xsk_pool_enable(struct net_device *dev,
1492                                struct xsk_buff_pool *pool,
1493                                u16 qid)
1494 {
1495         struct gve_priv *priv = netdev_priv(dev);
1496         struct napi_struct *napi;
1497         struct gve_rx_ring *rx;
1498         int tx_qid;
1499         int err;
1500
1501         if (qid >= priv->rx_cfg.num_queues) {
1502                 dev_err(&priv->pdev->dev, "xsk pool invalid qid %d", qid);
1503                 return -EINVAL;
1504         }
1505         if (xsk_pool_get_rx_frame_size(pool) <
1506              priv->dev->max_mtu + sizeof(struct ethhdr)) {
1507                 dev_err(&priv->pdev->dev, "xsk pool frame_len too small");
1508                 return -EINVAL;
1509         }
1510
1511         err = xsk_pool_dma_map(pool, &priv->pdev->dev,
1512                                DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1513         if (err)
1514                 return err;
1515
1516         /* If XDP prog is not installed, return */
1517         if (!priv->xdp_prog)
1518                 return 0;
1519
1520         rx = &priv->rx[qid];
1521         napi = &priv->ntfy_blocks[rx->ntfy_id].napi;
1522         err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id);
1523         if (err)
1524                 goto err;
1525
1526         err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq,
1527                                          MEM_TYPE_XSK_BUFF_POOL, NULL);
1528         if (err)
1529                 goto err;
1530
1531         xsk_pool_set_rxq_info(pool, &rx->xsk_rxq);
1532         rx->xsk_pool = pool;
1533
1534         tx_qid = gve_xdp_tx_queue_id(priv, qid);
1535         priv->tx[tx_qid].xsk_pool = pool;
1536
1537         return 0;
1538 err:
1539         if (xdp_rxq_info_is_reg(&rx->xsk_rxq))
1540                 xdp_rxq_info_unreg(&rx->xsk_rxq);
1541
1542         xsk_pool_dma_unmap(pool,
1543                            DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1544         return err;
1545 }
1546
1547 static int gve_xsk_pool_disable(struct net_device *dev,
1548                                 u16 qid)
1549 {
1550         struct gve_priv *priv = netdev_priv(dev);
1551         struct napi_struct *napi_rx;
1552         struct napi_struct *napi_tx;
1553         struct xsk_buff_pool *pool;
1554         int tx_qid;
1555
1556         pool = xsk_get_pool_from_qid(dev, qid);
1557         if (!pool)
1558                 return -EINVAL;
1559         if (qid >= priv->rx_cfg.num_queues)
1560                 return -EINVAL;
1561
1562         /* If XDP prog is not installed, unmap DMA and return */
1563         if (!priv->xdp_prog)
1564                 goto done;
1565
1566         tx_qid = gve_xdp_tx_queue_id(priv, qid);
1567         if (!netif_running(dev)) {
1568                 priv->rx[qid].xsk_pool = NULL;
1569                 xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1570                 priv->tx[tx_qid].xsk_pool = NULL;
1571                 goto done;
1572         }
1573
1574         napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi;
1575         napi_disable(napi_rx); /* make sure current rx poll is done */
1576
1577         napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi;
1578         napi_disable(napi_tx); /* make sure current tx poll is done */
1579
1580         priv->rx[qid].xsk_pool = NULL;
1581         xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq);
1582         priv->tx[tx_qid].xsk_pool = NULL;
1583         smp_mb(); /* Make sure it is visible to the workers on datapath */
1584
1585         napi_enable(napi_rx);
1586         if (gve_rx_work_pending(&priv->rx[qid]))
1587                 napi_schedule(napi_rx);
1588
1589         napi_enable(napi_tx);
1590         if (gve_tx_clean_pending(priv, &priv->tx[tx_qid]))
1591                 napi_schedule(napi_tx);
1592
1593 done:
1594         xsk_pool_dma_unmap(pool,
1595                            DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
1596         return 0;
1597 }
1598
1599 static int gve_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
1600 {
1601         struct gve_priv *priv = netdev_priv(dev);
1602         int tx_queue_id = gve_xdp_tx_queue_id(priv, queue_id);
1603
1604         if (queue_id >= priv->rx_cfg.num_queues || !priv->xdp_prog)
1605                 return -EINVAL;
1606
1607         if (flags & XDP_WAKEUP_TX) {
1608                 struct gve_tx_ring *tx = &priv->tx[tx_queue_id];
1609                 struct napi_struct *napi =
1610                         &priv->ntfy_blocks[tx->ntfy_id].napi;
1611
1612                 if (!napi_if_scheduled_mark_missed(napi)) {
1613                         /* Call local_bh_enable to trigger SoftIRQ processing */
1614                         local_bh_disable();
1615                         napi_schedule(napi);
1616                         local_bh_enable();
1617                 }
1618
1619                 tx->xdp_xsk_wakeup++;
1620         }
1621
1622         return 0;
1623 }
1624
1625 static int verify_xdp_configuration(struct net_device *dev)
1626 {
1627         struct gve_priv *priv = netdev_priv(dev);
1628
1629         if (dev->features & NETIF_F_LRO) {
1630                 netdev_warn(dev, "XDP is not supported when LRO is on.\n");
1631                 return -EOPNOTSUPP;
1632         }
1633
1634         if (priv->queue_format != GVE_GQI_QPL_FORMAT) {
1635                 netdev_warn(dev, "XDP is not supported in mode %d.\n",
1636                             priv->queue_format);
1637                 return -EOPNOTSUPP;
1638         }
1639
1640         if (dev->mtu > (PAGE_SIZE / 2) - sizeof(struct ethhdr) - GVE_RX_PAD) {
1641                 netdev_warn(dev, "XDP is not supported for mtu %d.\n",
1642                             dev->mtu);
1643                 return -EOPNOTSUPP;
1644         }
1645
1646         if (priv->rx_cfg.num_queues != priv->tx_cfg.num_queues ||
1647             (2 * priv->tx_cfg.num_queues > priv->tx_cfg.max_queues)) {
1648                 netdev_warn(dev, "XDP load failed: The number of configured RX queues %d should be equal to the number of configured TX queues %d and the number of configured RX/TX queues should be less than or equal to half the maximum number of RX/TX queues %d",
1649                             priv->rx_cfg.num_queues,
1650                             priv->tx_cfg.num_queues,
1651                             priv->tx_cfg.max_queues);
1652                 return -EINVAL;
1653         }
1654         return 0;
1655 }
1656
1657 static int gve_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1658 {
1659         struct gve_priv *priv = netdev_priv(dev);
1660         int err;
1661
1662         err = verify_xdp_configuration(dev);
1663         if (err)
1664                 return err;
1665         switch (xdp->command) {
1666         case XDP_SETUP_PROG:
1667                 return gve_set_xdp(priv, xdp->prog, xdp->extack);
1668         case XDP_SETUP_XSK_POOL:
1669                 if (xdp->xsk.pool)
1670                         return gve_xsk_pool_enable(dev, xdp->xsk.pool, xdp->xsk.queue_id);
1671                 else
1672                         return gve_xsk_pool_disable(dev, xdp->xsk.queue_id);
1673         default:
1674                 return -EINVAL;
1675         }
1676 }
1677
1678 int gve_adjust_queues(struct gve_priv *priv,
1679                       struct gve_queue_config new_rx_config,
1680                       struct gve_queue_config new_tx_config)
1681 {
1682         int err;
1683
1684         if (netif_carrier_ok(priv->dev)) {
1685                 /* To make this process as simple as possible we teardown the
1686                  * device, set the new configuration, and then bring the device
1687                  * up again.
1688                  */
1689                 err = gve_close(priv->dev);
1690                 /* we have already tried to reset in close,
1691                  * just fail at this point
1692                  */
1693                 if (err)
1694                         return err;
1695                 priv->tx_cfg = new_tx_config;
1696                 priv->rx_cfg = new_rx_config;
1697
1698                 err = gve_open(priv->dev);
1699                 if (err)
1700                         goto err;
1701
1702                 return 0;
1703         }
1704         /* Set the config for the next up. */
1705         priv->tx_cfg = new_tx_config;
1706         priv->rx_cfg = new_rx_config;
1707
1708         return 0;
1709 err:
1710         netif_err(priv, drv, priv->dev,
1711                   "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n");
1712         gve_turndown(priv);
1713         return err;
1714 }
1715
1716 static void gve_turndown(struct gve_priv *priv)
1717 {
1718         int idx;
1719
1720         if (netif_carrier_ok(priv->dev))
1721                 netif_carrier_off(priv->dev);
1722
1723         if (!gve_get_napi_enabled(priv))
1724                 return;
1725
1726         /* Disable napi to prevent more work from coming in */
1727         for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1728                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1729                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1730
1731                 napi_disable(&block->napi);
1732         }
1733         for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1734                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1735                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1736
1737                 napi_disable(&block->napi);
1738         }
1739
1740         /* Stop tx queues */
1741         netif_tx_disable(priv->dev);
1742
1743         gve_clear_napi_enabled(priv);
1744         gve_clear_report_stats(priv);
1745 }
1746
1747 static void gve_turnup(struct gve_priv *priv)
1748 {
1749         int idx;
1750
1751         /* Start the tx queues */
1752         netif_tx_start_all_queues(priv->dev);
1753
1754         /* Enable napi and unmask interrupts for all queues */
1755         for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1756                 int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
1757                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1758
1759                 napi_enable(&block->napi);
1760                 if (gve_is_gqi(priv)) {
1761                         iowrite32be(0, gve_irq_doorbell(priv, block));
1762                 } else {
1763                         gve_set_itr_coalesce_usecs_dqo(priv, block,
1764                                                        priv->tx_coalesce_usecs);
1765                 }
1766         }
1767         for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1768                 int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
1769                 struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
1770
1771                 napi_enable(&block->napi);
1772                 if (gve_is_gqi(priv)) {
1773                         iowrite32be(0, gve_irq_doorbell(priv, block));
1774                 } else {
1775                         gve_set_itr_coalesce_usecs_dqo(priv, block,
1776                                                        priv->rx_coalesce_usecs);
1777                 }
1778         }
1779
1780         gve_set_napi_enabled(priv);
1781 }
1782
1783 static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
1784 {
1785         struct gve_notify_block *block;
1786         struct gve_tx_ring *tx = NULL;
1787         struct gve_priv *priv;
1788         u32 last_nic_done;
1789         u32 current_time;
1790         u32 ntfy_idx;
1791
1792         netdev_info(dev, "Timeout on tx queue, %d", txqueue);
1793         priv = netdev_priv(dev);
1794         if (txqueue > priv->tx_cfg.num_queues)
1795                 goto reset;
1796
1797         ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
1798         if (ntfy_idx >= priv->num_ntfy_blks)
1799                 goto reset;
1800
1801         block = &priv->ntfy_blocks[ntfy_idx];
1802         tx = block->tx;
1803
1804         current_time = jiffies_to_msecs(jiffies);
1805         if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
1806                 goto reset;
1807
1808         /* Check to see if there are missed completions, which will allow us to
1809          * kick the queue.
1810          */
1811         last_nic_done = gve_tx_load_event_counter(priv, tx);
1812         if (last_nic_done - tx->done) {
1813                 netdev_info(dev, "Kicking queue %d", txqueue);
1814                 iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
1815                 napi_schedule(&block->napi);
1816                 tx->last_kick_msec = current_time;
1817                 goto out;
1818         } // Else reset.
1819
1820 reset:
1821         gve_schedule_reset(priv);
1822
1823 out:
1824         if (tx)
1825                 tx->queue_timeout++;
1826         priv->tx_timeo_cnt++;
1827 }
1828
1829 static int gve_set_features(struct net_device *netdev,
1830                             netdev_features_t features)
1831 {
1832         const netdev_features_t orig_features = netdev->features;
1833         struct gve_priv *priv = netdev_priv(netdev);
1834         int err;
1835
1836         if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
1837                 netdev->features ^= NETIF_F_LRO;
1838                 if (netif_carrier_ok(netdev)) {
1839                         /* To make this process as simple as possible we
1840                          * teardown the device, set the new configuration,
1841                          * and then bring the device up again.
1842                          */
1843                         err = gve_close(netdev);
1844                         /* We have already tried to reset in close, just fail
1845                          * at this point.
1846                          */
1847                         if (err)
1848                                 goto err;
1849
1850                         err = gve_open(netdev);
1851                         if (err)
1852                                 goto err;
1853                 }
1854         }
1855
1856         return 0;
1857 err:
1858         /* Reverts the change on error. */
1859         netdev->features = orig_features;
1860         netif_err(priv, drv, netdev,
1861                   "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
1862         return err;
1863 }
1864
1865 static const struct net_device_ops gve_netdev_ops = {
1866         .ndo_start_xmit         =       gve_start_xmit,
1867         .ndo_open               =       gve_open,
1868         .ndo_stop               =       gve_close,
1869         .ndo_get_stats64        =       gve_get_stats,
1870         .ndo_tx_timeout         =       gve_tx_timeout,
1871         .ndo_set_features       =       gve_set_features,
1872         .ndo_bpf                =       gve_xdp,
1873         .ndo_xdp_xmit           =       gve_xdp_xmit,
1874         .ndo_xsk_wakeup         =       gve_xsk_wakeup,
1875 };
1876
1877 static void gve_handle_status(struct gve_priv *priv, u32 status)
1878 {
1879         if (GVE_DEVICE_STATUS_RESET_MASK & status) {
1880                 dev_info(&priv->pdev->dev, "Device requested reset.\n");
1881                 gve_set_do_reset(priv);
1882         }
1883         if (GVE_DEVICE_STATUS_REPORT_STATS_MASK & status) {
1884                 priv->stats_report_trigger_cnt++;
1885                 gve_set_do_report_stats(priv);
1886         }
1887 }
1888
1889 static void gve_handle_reset(struct gve_priv *priv)
1890 {
1891         /* A service task will be scheduled at the end of probe to catch any
1892          * resets that need to happen, and we don't want to reset until
1893          * probe is done.
1894          */
1895         if (gve_get_probe_in_progress(priv))
1896                 return;
1897
1898         if (gve_get_do_reset(priv)) {
1899                 rtnl_lock();
1900                 gve_reset(priv, false);
1901                 rtnl_unlock();
1902         }
1903 }
1904
1905 void gve_handle_report_stats(struct gve_priv *priv)
1906 {
1907         struct stats *stats = priv->stats_report->stats;
1908         int idx, stats_idx = 0;
1909         unsigned int start = 0;
1910         u64 tx_bytes;
1911
1912         if (!gve_get_report_stats(priv))
1913                 return;
1914
1915         be64_add_cpu(&priv->stats_report->written_count, 1);
1916         /* tx stats */
1917         if (priv->tx) {
1918                 for (idx = 0; idx < gve_num_tx_queues(priv); idx++) {
1919                         u32 last_completion = 0;
1920                         u32 tx_frames = 0;
1921
1922                         /* DQO doesn't currently support these metrics. */
1923                         if (gve_is_gqi(priv)) {
1924                                 last_completion = priv->tx[idx].done;
1925                                 tx_frames = priv->tx[idx].req;
1926                         }
1927
1928                         do {
1929                                 start = u64_stats_fetch_begin(&priv->tx[idx].statss);
1930                                 tx_bytes = priv->tx[idx].bytes_done;
1931                         } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start));
1932                         stats[stats_idx++] = (struct stats) {
1933                                 .stat_name = cpu_to_be32(TX_WAKE_CNT),
1934                                 .value = cpu_to_be64(priv->tx[idx].wake_queue),
1935                                 .queue_id = cpu_to_be32(idx),
1936                         };
1937                         stats[stats_idx++] = (struct stats) {
1938                                 .stat_name = cpu_to_be32(TX_STOP_CNT),
1939                                 .value = cpu_to_be64(priv->tx[idx].stop_queue),
1940                                 .queue_id = cpu_to_be32(idx),
1941                         };
1942                         stats[stats_idx++] = (struct stats) {
1943                                 .stat_name = cpu_to_be32(TX_FRAMES_SENT),
1944                                 .value = cpu_to_be64(tx_frames),
1945                                 .queue_id = cpu_to_be32(idx),
1946                         };
1947                         stats[stats_idx++] = (struct stats) {
1948                                 .stat_name = cpu_to_be32(TX_BYTES_SENT),
1949                                 .value = cpu_to_be64(tx_bytes),
1950                                 .queue_id = cpu_to_be32(idx),
1951                         };
1952                         stats[stats_idx++] = (struct stats) {
1953                                 .stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
1954                                 .value = cpu_to_be64(last_completion),
1955                                 .queue_id = cpu_to_be32(idx),
1956                         };
1957                         stats[stats_idx++] = (struct stats) {
1958                                 .stat_name = cpu_to_be32(TX_TIMEOUT_CNT),
1959                                 .value = cpu_to_be64(priv->tx[idx].queue_timeout),
1960                                 .queue_id = cpu_to_be32(idx),
1961                         };
1962                 }
1963         }
1964         /* rx stats */
1965         if (priv->rx) {
1966                 for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
1967                         stats[stats_idx++] = (struct stats) {
1968                                 .stat_name = cpu_to_be32(RX_NEXT_EXPECTED_SEQUENCE),
1969                                 .value = cpu_to_be64(priv->rx[idx].desc.seqno),
1970                                 .queue_id = cpu_to_be32(idx),
1971                         };
1972                         stats[stats_idx++] = (struct stats) {
1973                                 .stat_name = cpu_to_be32(RX_BUFFERS_POSTED),
1974                                 .value = cpu_to_be64(priv->rx[0].fill_cnt),
1975                                 .queue_id = cpu_to_be32(idx),
1976                         };
1977                 }
1978         }
1979 }
1980
1981 /* Handle NIC status register changes, reset requests and report stats */
1982 static void gve_service_task(struct work_struct *work)
1983 {
1984         struct gve_priv *priv = container_of(work, struct gve_priv,
1985                                              service_task);
1986         u32 status = ioread32be(&priv->reg_bar0->device_status);
1987
1988         gve_handle_status(priv, status);
1989
1990         gve_handle_reset(priv);
1991         gve_handle_link_status(priv, GVE_DEVICE_STATUS_LINK_STATUS_MASK & status);
1992 }
1993
1994 static void gve_set_netdev_xdp_features(struct gve_priv *priv)
1995 {
1996         if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
1997                 priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
1998                 priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
1999                 priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
2000                 priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
2001         } else {
2002                 priv->dev->xdp_features = 0;
2003         }
2004 }
2005
2006 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
2007 {
2008         int num_ntfy;
2009         int err;
2010
2011         /* Set up the adminq */
2012         err = gve_adminq_alloc(&priv->pdev->dev, priv);
2013         if (err) {
2014                 dev_err(&priv->pdev->dev,
2015                         "Failed to alloc admin queue: err=%d\n", err);
2016                 return err;
2017         }
2018
2019         err = gve_verify_driver_compatibility(priv);
2020         if (err) {
2021                 dev_err(&priv->pdev->dev,
2022                         "Could not verify driver compatibility: err=%d\n", err);
2023                 goto err;
2024         }
2025
2026         if (skip_describe_device)
2027                 goto setup_device;
2028
2029         priv->queue_format = GVE_QUEUE_FORMAT_UNSPECIFIED;
2030         /* Get the initial information we need from the device */
2031         err = gve_adminq_describe_device(priv);
2032         if (err) {
2033                 dev_err(&priv->pdev->dev,
2034                         "Could not get device information: err=%d\n", err);
2035                 goto err;
2036         }
2037         priv->dev->mtu = priv->dev->max_mtu;
2038         num_ntfy = pci_msix_vec_count(priv->pdev);
2039         if (num_ntfy <= 0) {
2040                 dev_err(&priv->pdev->dev,
2041                         "could not count MSI-x vectors: err=%d\n", num_ntfy);
2042                 err = num_ntfy;
2043                 goto err;
2044         } else if (num_ntfy < GVE_MIN_MSIX) {
2045                 dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n",
2046                         GVE_MIN_MSIX, num_ntfy);
2047                 err = -EINVAL;
2048                 goto err;
2049         }
2050
2051         /* Big TCP is only supported on DQ*/
2052         if (!gve_is_gqi(priv))
2053                 netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
2054
2055         priv->num_registered_pages = 0;
2056         priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
2057         /* gvnic has one Notification Block per MSI-x vector, except for the
2058          * management vector
2059          */
2060         priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1;
2061         priv->mgmt_msix_idx = priv->num_ntfy_blks;
2062
2063         priv->tx_cfg.max_queues =
2064                 min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2);
2065         priv->rx_cfg.max_queues =
2066                 min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2);
2067
2068         priv->tx_cfg.num_queues = priv->tx_cfg.max_queues;
2069         priv->rx_cfg.num_queues = priv->rx_cfg.max_queues;
2070         if (priv->default_num_queues > 0) {
2071                 priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues,
2072                                                 priv->tx_cfg.num_queues);
2073                 priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues,
2074                                                 priv->rx_cfg.num_queues);
2075         }
2076
2077         dev_info(&priv->pdev->dev, "TX queues %d, RX queues %d\n",
2078                  priv->tx_cfg.num_queues, priv->rx_cfg.num_queues);
2079         dev_info(&priv->pdev->dev, "Max TX queues %d, Max RX queues %d\n",
2080                  priv->tx_cfg.max_queues, priv->rx_cfg.max_queues);
2081
2082         if (!gve_is_gqi(priv)) {
2083                 priv->tx_coalesce_usecs = GVE_TX_IRQ_RATELIMIT_US_DQO;
2084                 priv->rx_coalesce_usecs = GVE_RX_IRQ_RATELIMIT_US_DQO;
2085         }
2086
2087 setup_device:
2088         gve_set_netdev_xdp_features(priv);
2089         err = gve_setup_device_resources(priv);
2090         if (!err)
2091                 return 0;
2092 err:
2093         gve_adminq_free(&priv->pdev->dev, priv);
2094         return err;
2095 }
2096
2097 static void gve_teardown_priv_resources(struct gve_priv *priv)
2098 {
2099         gve_teardown_device_resources(priv);
2100         gve_adminq_free(&priv->pdev->dev, priv);
2101 }
2102
2103 static void gve_trigger_reset(struct gve_priv *priv)
2104 {
2105         /* Reset the device by releasing the AQ */
2106         gve_adminq_release(priv);
2107 }
2108
2109 static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up)
2110 {
2111         gve_trigger_reset(priv);
2112         /* With the reset having already happened, close cannot fail */
2113         if (was_up)
2114                 gve_close(priv->dev);
2115         gve_teardown_priv_resources(priv);
2116 }
2117
2118 static int gve_reset_recovery(struct gve_priv *priv, bool was_up)
2119 {
2120         int err;
2121
2122         err = gve_init_priv(priv, true);
2123         if (err)
2124                 goto err;
2125         if (was_up) {
2126                 err = gve_open(priv->dev);
2127                 if (err)
2128                         goto err;
2129         }
2130         return 0;
2131 err:
2132         dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n");
2133         gve_turndown(priv);
2134         return err;
2135 }
2136
2137 int gve_reset(struct gve_priv *priv, bool attempt_teardown)
2138 {
2139         bool was_up = netif_carrier_ok(priv->dev);
2140         int err;
2141
2142         dev_info(&priv->pdev->dev, "Performing reset\n");
2143         gve_clear_do_reset(priv);
2144         gve_set_reset_in_progress(priv);
2145         /* If we aren't attempting to teardown normally, just go turndown and
2146          * reset right away.
2147          */
2148         if (!attempt_teardown) {
2149                 gve_turndown(priv);
2150                 gve_reset_and_teardown(priv, was_up);
2151         } else {
2152                 /* Otherwise attempt to close normally */
2153                 if (was_up) {
2154                         err = gve_close(priv->dev);
2155                         /* If that fails reset as we did above */
2156                         if (err)
2157                                 gve_reset_and_teardown(priv, was_up);
2158                 }
2159                 /* Clean up any remaining resources */
2160                 gve_teardown_priv_resources(priv);
2161         }
2162
2163         /* Set it all back up */
2164         err = gve_reset_recovery(priv, was_up);
2165         gve_clear_reset_in_progress(priv);
2166         priv->reset_cnt++;
2167         priv->interface_up_cnt = 0;
2168         priv->interface_down_cnt = 0;
2169         priv->stats_report_trigger_cnt = 0;
2170         return err;
2171 }
2172
2173 static void gve_write_version(u8 __iomem *driver_version_register)
2174 {
2175         const char *c = gve_version_prefix;
2176
2177         while (*c) {
2178                 writeb(*c, driver_version_register);
2179                 c++;
2180         }
2181
2182         c = gve_version_str;
2183         while (*c) {
2184                 writeb(*c, driver_version_register);
2185                 c++;
2186         }
2187         writeb('\n', driver_version_register);
2188 }
2189
2190 static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2191 {
2192         int max_tx_queues, max_rx_queues;
2193         struct net_device *dev;
2194         __be32 __iomem *db_bar;
2195         struct gve_registers __iomem *reg_bar;
2196         struct gve_priv *priv;
2197         int err;
2198
2199         err = pci_enable_device(pdev);
2200         if (err)
2201                 return err;
2202
2203         err = pci_request_regions(pdev, "gvnic-cfg");
2204         if (err)
2205                 goto abort_with_enabled;
2206
2207         pci_set_master(pdev);
2208
2209         err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2210         if (err) {
2211                 dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err);
2212                 goto abort_with_pci_region;
2213         }
2214
2215         reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0);
2216         if (!reg_bar) {
2217                 dev_err(&pdev->dev, "Failed to map pci bar!\n");
2218                 err = -ENOMEM;
2219                 goto abort_with_pci_region;
2220         }
2221
2222         db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0);
2223         if (!db_bar) {
2224                 dev_err(&pdev->dev, "Failed to map doorbell bar!\n");
2225                 err = -ENOMEM;
2226                 goto abort_with_reg_bar;
2227         }
2228
2229         gve_write_version(&reg_bar->driver_version);
2230         /* Get max queues to alloc etherdev */
2231         max_tx_queues = ioread32be(&reg_bar->max_tx_queues);
2232         max_rx_queues = ioread32be(&reg_bar->max_rx_queues);
2233         /* Alloc and setup the netdev and priv */
2234         dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues);
2235         if (!dev) {
2236                 dev_err(&pdev->dev, "could not allocate netdev\n");
2237                 err = -ENOMEM;
2238                 goto abort_with_db_bar;
2239         }
2240         SET_NETDEV_DEV(dev, &pdev->dev);
2241         pci_set_drvdata(pdev, dev);
2242         dev->ethtool_ops = &gve_ethtool_ops;
2243         dev->netdev_ops = &gve_netdev_ops;
2244
2245         /* Set default and supported features.
2246          *
2247          * Features might be set in other locations as well (such as
2248          * `gve_adminq_describe_device`).
2249          */
2250         dev->hw_features = NETIF_F_HIGHDMA;
2251         dev->hw_features |= NETIF_F_SG;
2252         dev->hw_features |= NETIF_F_HW_CSUM;
2253         dev->hw_features |= NETIF_F_TSO;
2254         dev->hw_features |= NETIF_F_TSO6;
2255         dev->hw_features |= NETIF_F_TSO_ECN;
2256         dev->hw_features |= NETIF_F_RXCSUM;
2257         dev->hw_features |= NETIF_F_RXHASH;
2258         dev->features = dev->hw_features;
2259         dev->watchdog_timeo = 5 * HZ;
2260         dev->min_mtu = ETH_MIN_MTU;
2261         netif_carrier_off(dev);
2262
2263         priv = netdev_priv(dev);
2264         priv->dev = dev;
2265         priv->pdev = pdev;
2266         priv->msg_enable = DEFAULT_MSG_LEVEL;
2267         priv->reg_bar0 = reg_bar;
2268         priv->db_bar2 = db_bar;
2269         priv->service_task_flags = 0x0;
2270         priv->state_flags = 0x0;
2271         priv->ethtool_flags = 0x0;
2272
2273         gve_set_probe_in_progress(priv);
2274         priv->gve_wq = alloc_ordered_workqueue("gve", 0);
2275         if (!priv->gve_wq) {
2276                 dev_err(&pdev->dev, "Could not allocate workqueue");
2277                 err = -ENOMEM;
2278                 goto abort_with_netdev;
2279         }
2280         INIT_WORK(&priv->service_task, gve_service_task);
2281         INIT_WORK(&priv->stats_report_task, gve_stats_report_task);
2282         priv->tx_cfg.max_queues = max_tx_queues;
2283         priv->rx_cfg.max_queues = max_rx_queues;
2284
2285         err = gve_init_priv(priv, false);
2286         if (err)
2287                 goto abort_with_wq;
2288
2289         err = register_netdev(dev);
2290         if (err)
2291                 goto abort_with_gve_init;
2292
2293         dev_info(&pdev->dev, "GVE version %s\n", gve_version_str);
2294         dev_info(&pdev->dev, "GVE queue format %d\n", (int)priv->queue_format);
2295         gve_clear_probe_in_progress(priv);
2296         queue_work(priv->gve_wq, &priv->service_task);
2297         return 0;
2298
2299 abort_with_gve_init:
2300         gve_teardown_priv_resources(priv);
2301
2302 abort_with_wq:
2303         destroy_workqueue(priv->gve_wq);
2304
2305 abort_with_netdev:
2306         free_netdev(dev);
2307
2308 abort_with_db_bar:
2309         pci_iounmap(pdev, db_bar);
2310
2311 abort_with_reg_bar:
2312         pci_iounmap(pdev, reg_bar);
2313
2314 abort_with_pci_region:
2315         pci_release_regions(pdev);
2316
2317 abort_with_enabled:
2318         pci_disable_device(pdev);
2319         return err;
2320 }
2321
2322 static void gve_remove(struct pci_dev *pdev)
2323 {
2324         struct net_device *netdev = pci_get_drvdata(pdev);
2325         struct gve_priv *priv = netdev_priv(netdev);
2326         __be32 __iomem *db_bar = priv->db_bar2;
2327         void __iomem *reg_bar = priv->reg_bar0;
2328
2329         unregister_netdev(netdev);
2330         gve_teardown_priv_resources(priv);
2331         destroy_workqueue(priv->gve_wq);
2332         free_netdev(netdev);
2333         pci_iounmap(pdev, db_bar);
2334         pci_iounmap(pdev, reg_bar);
2335         pci_release_regions(pdev);
2336         pci_disable_device(pdev);
2337 }
2338
2339 static void gve_shutdown(struct pci_dev *pdev)
2340 {
2341         struct net_device *netdev = pci_get_drvdata(pdev);
2342         struct gve_priv *priv = netdev_priv(netdev);
2343         bool was_up = netif_carrier_ok(priv->dev);
2344
2345         rtnl_lock();
2346         if (was_up && gve_close(priv->dev)) {
2347                 /* If the dev was up, attempt to close, if close fails, reset */
2348                 gve_reset_and_teardown(priv, was_up);
2349         } else {
2350                 /* If the dev wasn't up or close worked, finish tearing down */
2351                 gve_teardown_priv_resources(priv);
2352         }
2353         rtnl_unlock();
2354 }
2355
2356 #ifdef CONFIG_PM
2357 static int gve_suspend(struct pci_dev *pdev, pm_message_t state)
2358 {
2359         struct net_device *netdev = pci_get_drvdata(pdev);
2360         struct gve_priv *priv = netdev_priv(netdev);
2361         bool was_up = netif_carrier_ok(priv->dev);
2362
2363         priv->suspend_cnt++;
2364         rtnl_lock();
2365         if (was_up && gve_close(priv->dev)) {
2366                 /* If the dev was up, attempt to close, if close fails, reset */
2367                 gve_reset_and_teardown(priv, was_up);
2368         } else {
2369                 /* If the dev wasn't up or close worked, finish tearing down */
2370                 gve_teardown_priv_resources(priv);
2371         }
2372         priv->up_before_suspend = was_up;
2373         rtnl_unlock();
2374         return 0;
2375 }
2376
2377 static int gve_resume(struct pci_dev *pdev)
2378 {
2379         struct net_device *netdev = pci_get_drvdata(pdev);
2380         struct gve_priv *priv = netdev_priv(netdev);
2381         int err;
2382
2383         priv->resume_cnt++;
2384         rtnl_lock();
2385         err = gve_reset_recovery(priv, priv->up_before_suspend);
2386         rtnl_unlock();
2387         return err;
2388 }
2389 #endif /* CONFIG_PM */
2390
2391 static const struct pci_device_id gve_id_table[] = {
2392         { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) },
2393         { }
2394 };
2395
2396 static struct pci_driver gvnic_driver = {
2397         .name           = "gvnic",
2398         .id_table       = gve_id_table,
2399         .probe          = gve_probe,
2400         .remove         = gve_remove,
2401         .shutdown       = gve_shutdown,
2402 #ifdef CONFIG_PM
2403         .suspend        = gve_suspend,
2404         .resume         = gve_resume,
2405 #endif
2406 };
2407
2408 module_pci_driver(gvnic_driver);
2409
2410 MODULE_DEVICE_TABLE(pci, gve_id_table);
2411 MODULE_AUTHOR("Google, Inc.");
2412 MODULE_DESCRIPTION("gVNIC Driver");
2413 MODULE_LICENSE("Dual MIT/GPL");
2414 MODULE_VERSION(GVE_VERSION);