Merge git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git
[platform/kernel/linux-starfive.git] / drivers / net / ethernet / amazon / ena / ena_netdev.c
1 /*
2  * Copyright 2015 Amazon.com, Inc. or its affiliates.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #ifdef CONFIG_RFS_ACCEL
36 #include <linux/cpu_rmap.h>
37 #endif /* CONFIG_RFS_ACCEL */
38 #include <linux/ethtool.h>
39 #include <linux/kernel.h>
40 #include <linux/module.h>
41 #include <linux/numa.h>
42 #include <linux/pci.h>
43 #include <linux/utsname.h>
44 #include <linux/version.h>
45 #include <linux/vmalloc.h>
46 #include <net/ip.h>
47
48 #include "ena_netdev.h"
49 #include <linux/bpf_trace.h>
50 #include "ena_pci_id_tbl.h"
51
52 MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
53 MODULE_DESCRIPTION(DEVICE_NAME);
54 MODULE_LICENSE("GPL");
55
56 /* Time in jiffies before concluding the transmitter is hung. */
57 #define TX_TIMEOUT  (5 * HZ)
58
59 #define ENA_NAPI_BUDGET 64
60
61 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
62                 NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
63 static int debug = -1;
64 module_param(debug, int, 0);
65 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
66
67 static struct ena_aenq_handlers aenq_handlers;
68
69 static struct workqueue_struct *ena_wq;
70
71 MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
72
73 static int ena_rss_init_default(struct ena_adapter *adapter);
74 static void check_for_admin_com_state(struct ena_adapter *adapter);
75 static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
76 static int ena_restore_device(struct ena_adapter *adapter);
77
78 static void ena_init_io_rings(struct ena_adapter *adapter,
79                               int first_index, int count);
80 static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
81                                    int count);
82 static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
83                                   int count);
84 static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
85 static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
86                                            int first_index,
87                                            int count);
88 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
89 static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
90 static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
91 static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
92 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
93 static void ena_napi_disable_in_range(struct ena_adapter *adapter,
94                                       int first_index, int count);
95 static void ena_napi_enable_in_range(struct ena_adapter *adapter,
96                                      int first_index, int count);
97 static int ena_up(struct ena_adapter *adapter);
98 static void ena_down(struct ena_adapter *adapter);
99 static void ena_unmask_interrupt(struct ena_ring *tx_ring,
100                                  struct ena_ring *rx_ring);
101 static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
102                                       struct ena_ring *rx_ring);
103 static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
104                               struct ena_tx_buffer *tx_info);
105 static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
106                                             int first_index, int count);
107
108 static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
109 {
110         struct ena_adapter *adapter = netdev_priv(dev);
111
112         /* Change the state of the device to trigger reset
113          * Check that we are not in the middle or a trigger already
114          */
115
116         if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
117                 return;
118
119         adapter->reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
120         u64_stats_update_begin(&adapter->syncp);
121         adapter->dev_stats.tx_timeout++;
122         u64_stats_update_end(&adapter->syncp);
123
124         netif_err(adapter, tx_err, dev, "Transmit time out\n");
125 }
126
127 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
128 {
129         int i;
130
131         for (i = 0; i < adapter->num_io_queues; i++)
132                 adapter->rx_ring[i].mtu = mtu;
133 }
134
135 static int ena_change_mtu(struct net_device *dev, int new_mtu)
136 {
137         struct ena_adapter *adapter = netdev_priv(dev);
138         int ret;
139
140         ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
141         if (!ret) {
142                 netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
143                 update_rx_ring_mtu(adapter, new_mtu);
144                 dev->mtu = new_mtu;
145         } else {
146                 netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
147                           new_mtu);
148         }
149
150         return ret;
151 }
152
153 static int ena_xmit_common(struct net_device *dev,
154                            struct ena_ring *ring,
155                            struct ena_tx_buffer *tx_info,
156                            struct ena_com_tx_ctx *ena_tx_ctx,
157                            u16 next_to_use,
158                            u32 bytes)
159 {
160         struct ena_adapter *adapter = netdev_priv(dev);
161         int rc, nb_hw_desc;
162
163         if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
164                                                 ena_tx_ctx))) {
165                 netif_dbg(adapter, tx_queued, dev,
166                           "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
167                           ring->qid);
168                 ena_com_write_sq_doorbell(ring->ena_com_io_sq);
169         }
170
171         /* prepare the packet's descriptors to dma engine */
172         rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
173                                 &nb_hw_desc);
174
175         /* In case there isn't enough space in the queue for the packet,
176          * we simply drop it. All other failure reasons of
177          * ena_com_prepare_tx() are fatal and therefore require a device reset.
178          */
179         if (unlikely(rc)) {
180                 netif_err(adapter, tx_queued, dev,
181                           "failed to prepare tx bufs\n");
182                 u64_stats_update_begin(&ring->syncp);
183                 ring->tx_stats.prepare_ctx_err++;
184                 u64_stats_update_end(&ring->syncp);
185                 if (rc != -ENOMEM) {
186                         adapter->reset_reason =
187                                 ENA_REGS_RESET_DRIVER_INVALID_STATE;
188                         set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
189                 }
190                 return rc;
191         }
192
193         u64_stats_update_begin(&ring->syncp);
194         ring->tx_stats.cnt++;
195         ring->tx_stats.bytes += bytes;
196         u64_stats_update_end(&ring->syncp);
197
198         tx_info->tx_descs = nb_hw_desc;
199         tx_info->last_jiffies = jiffies;
200         tx_info->print_once = 0;
201
202         ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
203                                                  ring->ring_size);
204         return 0;
205 }
206
207 /* This is the XDP napi callback. XDP queues use a separate napi callback
208  * than Rx/Tx queues.
209  */
210 static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
211 {
212         struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
213         u32 xdp_work_done, xdp_budget;
214         struct ena_ring *xdp_ring;
215         int napi_comp_call = 0;
216         int ret;
217
218         xdp_ring = ena_napi->xdp_ring;
219         xdp_ring->first_interrupt = ena_napi->first_interrupt;
220
221         xdp_budget = budget;
222
223         if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
224             test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
225                 napi_complete_done(napi, 0);
226                 return 0;
227         }
228
229         xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
230
231         /* If the device is about to reset or down, avoid unmask
232          * the interrupt and return 0 so NAPI won't reschedule
233          */
234         if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
235                 napi_complete_done(napi, 0);
236                 ret = 0;
237         } else if (xdp_budget > xdp_work_done) {
238                 napi_comp_call = 1;
239                 if (napi_complete_done(napi, xdp_work_done))
240                         ena_unmask_interrupt(xdp_ring, NULL);
241                 ena_update_ring_numa_node(xdp_ring, NULL);
242                 ret = xdp_work_done;
243         } else {
244                 ret = xdp_budget;
245         }
246
247         u64_stats_update_begin(&xdp_ring->syncp);
248         xdp_ring->tx_stats.napi_comp += napi_comp_call;
249         xdp_ring->tx_stats.tx_poll++;
250         u64_stats_update_end(&xdp_ring->syncp);
251
252         return ret;
253 }
254
255 static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring,
256                                struct ena_tx_buffer *tx_info,
257                                struct xdp_buff *xdp,
258                                void **push_hdr,
259                                u32 *push_len)
260 {
261         struct ena_adapter *adapter = xdp_ring->adapter;
262         struct ena_com_buf *ena_buf;
263         dma_addr_t dma = 0;
264         u32 size;
265
266         tx_info->xdpf = xdp_convert_buff_to_frame(xdp);
267         size = tx_info->xdpf->len;
268         ena_buf = tx_info->bufs;
269
270         /* llq push buffer */
271         *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
272         *push_hdr = tx_info->xdpf->data;
273
274         if (size - *push_len > 0) {
275                 dma = dma_map_single(xdp_ring->dev,
276                                      *push_hdr + *push_len,
277                                      size - *push_len,
278                                      DMA_TO_DEVICE);
279                 if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
280                         goto error_report_dma_error;
281
282                 tx_info->map_linear_data = 1;
283                 tx_info->num_of_bufs = 1;
284         }
285
286         ena_buf->paddr = dma;
287         ena_buf->len = size;
288
289         return 0;
290
291 error_report_dma_error:
292         u64_stats_update_begin(&xdp_ring->syncp);
293         xdp_ring->tx_stats.dma_mapping_err++;
294         u64_stats_update_end(&xdp_ring->syncp);
295         netdev_warn(adapter->netdev, "failed to map xdp buff\n");
296
297         xdp_return_frame_rx_napi(tx_info->xdpf);
298         tx_info->xdpf = NULL;
299         tx_info->num_of_bufs = 0;
300
301         return -EINVAL;
302 }
303
304 static int ena_xdp_xmit_buff(struct net_device *dev,
305                              struct xdp_buff *xdp,
306                              int qid,
307                              struct ena_rx_buffer *rx_info)
308 {
309         struct ena_adapter *adapter = netdev_priv(dev);
310         struct ena_com_tx_ctx ena_tx_ctx = {};
311         struct ena_tx_buffer *tx_info;
312         struct ena_ring *xdp_ring;
313         u16 next_to_use, req_id;
314         int rc;
315         void *push_hdr;
316         u32 push_len;
317
318         xdp_ring = &adapter->tx_ring[qid];
319         next_to_use = xdp_ring->next_to_use;
320         req_id = xdp_ring->free_ids[next_to_use];
321         tx_info = &xdp_ring->tx_buffer_info[req_id];
322         tx_info->num_of_bufs = 0;
323         page_ref_inc(rx_info->page);
324         tx_info->xdp_rx_page = rx_info->page;
325
326         rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len);
327         if (unlikely(rc))
328                 goto error_drop_packet;
329
330         ena_tx_ctx.ena_bufs = tx_info->bufs;
331         ena_tx_ctx.push_header = push_hdr;
332         ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
333         ena_tx_ctx.req_id = req_id;
334         ena_tx_ctx.header_len = push_len;
335
336         rc = ena_xmit_common(dev,
337                              xdp_ring,
338                              tx_info,
339                              &ena_tx_ctx,
340                              next_to_use,
341                              xdp->data_end - xdp->data);
342         if (rc)
343                 goto error_unmap_dma;
344         /* trigger the dma engine. ena_com_write_sq_doorbell()
345          * has a mb
346          */
347         ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
348         u64_stats_update_begin(&xdp_ring->syncp);
349         xdp_ring->tx_stats.doorbells++;
350         u64_stats_update_end(&xdp_ring->syncp);
351
352         return NETDEV_TX_OK;
353
354 error_unmap_dma:
355         ena_unmap_tx_buff(xdp_ring, tx_info);
356         tx_info->xdpf = NULL;
357 error_drop_packet:
358         __free_page(tx_info->xdp_rx_page);
359         return NETDEV_TX_OK;
360 }
361
362 static int ena_xdp_execute(struct ena_ring *rx_ring,
363                            struct xdp_buff *xdp,
364                            struct ena_rx_buffer *rx_info)
365 {
366         struct bpf_prog *xdp_prog;
367         u32 verdict = XDP_PASS;
368
369         rcu_read_lock();
370         xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
371
372         if (!xdp_prog)
373                 goto out;
374
375         verdict = bpf_prog_run_xdp(xdp_prog, xdp);
376
377         if (verdict == XDP_TX)
378                 ena_xdp_xmit_buff(rx_ring->netdev,
379                                   xdp,
380                                   rx_ring->qid + rx_ring->adapter->num_io_queues,
381                                   rx_info);
382         else if (unlikely(verdict == XDP_ABORTED))
383                 trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
384         else if (unlikely(verdict > XDP_TX))
385                 bpf_warn_invalid_xdp_action(verdict);
386 out:
387         rcu_read_unlock();
388         return verdict;
389 }
390
391 static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
392 {
393         adapter->xdp_first_ring = adapter->num_io_queues;
394         adapter->xdp_num_queues = adapter->num_io_queues;
395
396         ena_init_io_rings(adapter,
397                           adapter->xdp_first_ring,
398                           adapter->xdp_num_queues);
399 }
400
401 static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
402 {
403         int rc = 0;
404
405         rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
406                                              adapter->xdp_num_queues);
407         if (rc)
408                 goto setup_err;
409
410         rc = ena_create_io_tx_queues_in_range(adapter,
411                                               adapter->xdp_first_ring,
412                                               adapter->xdp_num_queues);
413         if (rc)
414                 goto create_err;
415
416         return 0;
417
418 create_err:
419         ena_free_all_io_tx_resources(adapter);
420 setup_err:
421         return rc;
422 }
423
424 /* Provides a way for both kernel and bpf-prog to know
425  * more about the RX-queue a given XDP frame arrived on.
426  */
427 static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
428 {
429         int rc;
430
431         rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid);
432
433         if (rc) {
434                 netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
435                           "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
436                           rx_ring->qid, rc);
437                 goto err;
438         }
439
440         rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
441                                         NULL);
442
443         if (rc) {
444                 netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
445                           "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
446                           rx_ring->qid, rc);
447                 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
448         }
449
450 err:
451         return rc;
452 }
453
454 static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
455 {
456         xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
457         xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
458 }
459
460 static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
461                                                  struct bpf_prog *prog,
462                                                  int first, int count)
463 {
464         struct ena_ring *rx_ring;
465         int i = 0;
466
467         for (i = first; i < count; i++) {
468                 rx_ring = &adapter->rx_ring[i];
469                 xchg(&rx_ring->xdp_bpf_prog, prog);
470                 if (prog) {
471                         ena_xdp_register_rxq_info(rx_ring);
472                         rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
473                 } else {
474                         ena_xdp_unregister_rxq_info(rx_ring);
475                         rx_ring->rx_headroom = 0;
476                 }
477         }
478 }
479
480 static void ena_xdp_exchange_program(struct ena_adapter *adapter,
481                                      struct bpf_prog *prog)
482 {
483         struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
484
485         ena_xdp_exchange_program_rx_in_range(adapter,
486                                              prog,
487                                              0,
488                                              adapter->num_io_queues);
489
490         if (old_bpf_prog)
491                 bpf_prog_put(old_bpf_prog);
492 }
493
494 static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
495 {
496         bool was_up;
497         int rc;
498
499         was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
500
501         if (was_up)
502                 ena_down(adapter);
503
504         adapter->xdp_first_ring = 0;
505         adapter->xdp_num_queues = 0;
506         ena_xdp_exchange_program(adapter, NULL);
507         if (was_up) {
508                 rc = ena_up(adapter);
509                 if (rc)
510                         return rc;
511         }
512         return 0;
513 }
514
515 static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
516 {
517         struct ena_adapter *adapter = netdev_priv(netdev);
518         struct bpf_prog *prog = bpf->prog;
519         struct bpf_prog *old_bpf_prog;
520         int rc, prev_mtu;
521         bool is_up;
522
523         is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
524         rc = ena_xdp_allowed(adapter);
525         if (rc == ENA_XDP_ALLOWED) {
526                 old_bpf_prog = adapter->xdp_bpf_prog;
527                 if (prog) {
528                         if (!is_up) {
529                                 ena_init_all_xdp_queues(adapter);
530                         } else if (!old_bpf_prog) {
531                                 ena_down(adapter);
532                                 ena_init_all_xdp_queues(adapter);
533                         }
534                         ena_xdp_exchange_program(adapter, prog);
535
536                         if (is_up && !old_bpf_prog) {
537                                 rc = ena_up(adapter);
538                                 if (rc)
539                                         return rc;
540                         }
541                 } else if (old_bpf_prog) {
542                         rc = ena_destroy_and_free_all_xdp_queues(adapter);
543                         if (rc)
544                                 return rc;
545                 }
546
547                 prev_mtu = netdev->max_mtu;
548                 netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
549
550                 if (!old_bpf_prog)
551                         netif_info(adapter, drv, adapter->netdev,
552                                    "xdp program set, changing the max_mtu from %d to %d",
553                                    prev_mtu, netdev->max_mtu);
554
555         } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
556                 netif_err(adapter, drv, adapter->netdev,
557                           "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
558                           netdev->mtu, ENA_XDP_MAX_MTU);
559                 NL_SET_ERR_MSG_MOD(bpf->extack,
560                                    "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
561                 return -EINVAL;
562         } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
563                 netif_err(adapter, drv, adapter->netdev,
564                           "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
565                           adapter->num_io_queues, adapter->max_num_io_queues);
566                 NL_SET_ERR_MSG_MOD(bpf->extack,
567                                    "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
568                 return -EINVAL;
569         }
570
571         return 0;
572 }
573
574 /* This is the main xdp callback, it's used by the kernel to set/unset the xdp
575  * program as well as to query the current xdp program id.
576  */
577 static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
578 {
579         struct ena_adapter *adapter = netdev_priv(netdev);
580
581         switch (bpf->command) {
582         case XDP_SETUP_PROG:
583                 return ena_xdp_set(netdev, bpf);
584         case XDP_QUERY_PROG:
585                 bpf->prog_id = adapter->xdp_bpf_prog ?
586                         adapter->xdp_bpf_prog->aux->id : 0;
587                 break;
588         default:
589                 return -EINVAL;
590         }
591         return 0;
592 }
593
594 static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
595 {
596 #ifdef CONFIG_RFS_ACCEL
597         u32 i;
598         int rc;
599
600         adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_io_queues);
601         if (!adapter->netdev->rx_cpu_rmap)
602                 return -ENOMEM;
603         for (i = 0; i < adapter->num_io_queues; i++) {
604                 int irq_idx = ENA_IO_IRQ_IDX(i);
605
606                 rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
607                                       pci_irq_vector(adapter->pdev, irq_idx));
608                 if (rc) {
609                         free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
610                         adapter->netdev->rx_cpu_rmap = NULL;
611                         return rc;
612                 }
613         }
614 #endif /* CONFIG_RFS_ACCEL */
615         return 0;
616 }
617
618 static void ena_init_io_rings_common(struct ena_adapter *adapter,
619                                      struct ena_ring *ring, u16 qid)
620 {
621         ring->qid = qid;
622         ring->pdev = adapter->pdev;
623         ring->dev = &adapter->pdev->dev;
624         ring->netdev = adapter->netdev;
625         ring->napi = &adapter->ena_napi[qid].napi;
626         ring->adapter = adapter;
627         ring->ena_dev = adapter->ena_dev;
628         ring->per_napi_packets = 0;
629         ring->cpu = 0;
630         ring->first_interrupt = false;
631         ring->no_interrupt_event_cnt = 0;
632         u64_stats_init(&ring->syncp);
633 }
634
635 static void ena_init_io_rings(struct ena_adapter *adapter,
636                               int first_index, int count)
637 {
638         struct ena_com_dev *ena_dev;
639         struct ena_ring *txr, *rxr;
640         int i;
641
642         ena_dev = adapter->ena_dev;
643
644         for (i = first_index; i < first_index + count; i++) {
645                 txr = &adapter->tx_ring[i];
646                 rxr = &adapter->rx_ring[i];
647
648                 /* TX common ring state */
649                 ena_init_io_rings_common(adapter, txr, i);
650
651                 /* TX specific ring state */
652                 txr->ring_size = adapter->requested_tx_ring_size;
653                 txr->tx_max_header_size = ena_dev->tx_max_header_size;
654                 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
655                 txr->sgl_size = adapter->max_tx_sgl_size;
656                 txr->smoothed_interval =
657                         ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
658                 txr->disable_meta_caching = adapter->disable_meta_caching;
659
660                 /* Don't init RX queues for xdp queues */
661                 if (!ENA_IS_XDP_INDEX(adapter, i)) {
662                         /* RX common ring state */
663                         ena_init_io_rings_common(adapter, rxr, i);
664
665                         /* RX specific ring state */
666                         rxr->ring_size = adapter->requested_rx_ring_size;
667                         rxr->rx_copybreak = adapter->rx_copybreak;
668                         rxr->sgl_size = adapter->max_rx_sgl_size;
669                         rxr->smoothed_interval =
670                                 ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
671                         rxr->empty_rx_queue = 0;
672                         adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
673                 }
674         }
675 }
676
677 /* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
678  * @adapter: network interface device structure
679  * @qid: queue index
680  *
681  * Return 0 on success, negative on failure
682  */
683 static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
684 {
685         struct ena_ring *tx_ring = &adapter->tx_ring[qid];
686         struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
687         int size, i, node;
688
689         if (tx_ring->tx_buffer_info) {
690                 netif_err(adapter, ifup,
691                           adapter->netdev, "tx_buffer_info info is not NULL");
692                 return -EEXIST;
693         }
694
695         size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
696         node = cpu_to_node(ena_irq->cpu);
697
698         tx_ring->tx_buffer_info = vzalloc_node(size, node);
699         if (!tx_ring->tx_buffer_info) {
700                 tx_ring->tx_buffer_info = vzalloc(size);
701                 if (!tx_ring->tx_buffer_info)
702                         goto err_tx_buffer_info;
703         }
704
705         size = sizeof(u16) * tx_ring->ring_size;
706         tx_ring->free_ids = vzalloc_node(size, node);
707         if (!tx_ring->free_ids) {
708                 tx_ring->free_ids = vzalloc(size);
709                 if (!tx_ring->free_ids)
710                         goto err_tx_free_ids;
711         }
712
713         size = tx_ring->tx_max_header_size;
714         tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
715         if (!tx_ring->push_buf_intermediate_buf) {
716                 tx_ring->push_buf_intermediate_buf = vzalloc(size);
717                 if (!tx_ring->push_buf_intermediate_buf)
718                         goto err_push_buf_intermediate_buf;
719         }
720
721         /* Req id ring for TX out of order completions */
722         for (i = 0; i < tx_ring->ring_size; i++)
723                 tx_ring->free_ids[i] = i;
724
725         /* Reset tx statistics */
726         memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
727
728         tx_ring->next_to_use = 0;
729         tx_ring->next_to_clean = 0;
730         tx_ring->cpu = ena_irq->cpu;
731         return 0;
732
733 err_push_buf_intermediate_buf:
734         vfree(tx_ring->free_ids);
735         tx_ring->free_ids = NULL;
736 err_tx_free_ids:
737         vfree(tx_ring->tx_buffer_info);
738         tx_ring->tx_buffer_info = NULL;
739 err_tx_buffer_info:
740         return -ENOMEM;
741 }
742
743 /* ena_free_tx_resources - Free I/O Tx Resources per Queue
744  * @adapter: network interface device structure
745  * @qid: queue index
746  *
747  * Free all transmit software resources
748  */
749 static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
750 {
751         struct ena_ring *tx_ring = &adapter->tx_ring[qid];
752
753         vfree(tx_ring->tx_buffer_info);
754         tx_ring->tx_buffer_info = NULL;
755
756         vfree(tx_ring->free_ids);
757         tx_ring->free_ids = NULL;
758
759         vfree(tx_ring->push_buf_intermediate_buf);
760         tx_ring->push_buf_intermediate_buf = NULL;
761 }
762
763 static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
764                                            int first_index,
765                                            int count)
766 {
767         int i, rc = 0;
768
769         for (i = first_index; i < first_index + count; i++) {
770                 rc = ena_setup_tx_resources(adapter, i);
771                 if (rc)
772                         goto err_setup_tx;
773         }
774
775         return 0;
776
777 err_setup_tx:
778
779         netif_err(adapter, ifup, adapter->netdev,
780                   "Tx queue %d: allocation failed\n", i);
781
782         /* rewind the index freeing the rings as we go */
783         while (first_index < i--)
784                 ena_free_tx_resources(adapter, i);
785         return rc;
786 }
787
788 static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
789                                                   int first_index, int count)
790 {
791         int i;
792
793         for (i = first_index; i < first_index + count; i++)
794                 ena_free_tx_resources(adapter, i);
795 }
796
797 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
798  * @adapter: board private structure
799  *
800  * Free all transmit software resources
801  */
802 static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
803 {
804         ena_free_all_io_tx_resources_in_range(adapter,
805                                               0,
806                                               adapter->xdp_num_queues +
807                                               adapter->num_io_queues);
808 }
809
810 static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
811 {
812         if (likely(req_id < rx_ring->ring_size))
813                 return 0;
814
815         netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
816                   "Invalid rx req_id: %hu\n", req_id);
817
818         u64_stats_update_begin(&rx_ring->syncp);
819         rx_ring->rx_stats.bad_req_id++;
820         u64_stats_update_end(&rx_ring->syncp);
821
822         /* Trigger device reset */
823         rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
824         set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
825         return -EFAULT;
826 }
827
828 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
829  * @adapter: network interface device structure
830  * @qid: queue index
831  *
832  * Returns 0 on success, negative on failure
833  */
834 static int ena_setup_rx_resources(struct ena_adapter *adapter,
835                                   u32 qid)
836 {
837         struct ena_ring *rx_ring = &adapter->rx_ring[qid];
838         struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
839         int size, node, i;
840
841         if (rx_ring->rx_buffer_info) {
842                 netif_err(adapter, ifup, adapter->netdev,
843                           "rx_buffer_info is not NULL");
844                 return -EEXIST;
845         }
846
847         /* alloc extra element so in rx path
848          * we can always prefetch rx_info + 1
849          */
850         size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
851         node = cpu_to_node(ena_irq->cpu);
852
853         rx_ring->rx_buffer_info = vzalloc_node(size, node);
854         if (!rx_ring->rx_buffer_info) {
855                 rx_ring->rx_buffer_info = vzalloc(size);
856                 if (!rx_ring->rx_buffer_info)
857                         return -ENOMEM;
858         }
859
860         size = sizeof(u16) * rx_ring->ring_size;
861         rx_ring->free_ids = vzalloc_node(size, node);
862         if (!rx_ring->free_ids) {
863                 rx_ring->free_ids = vzalloc(size);
864                 if (!rx_ring->free_ids) {
865                         vfree(rx_ring->rx_buffer_info);
866                         rx_ring->rx_buffer_info = NULL;
867                         return -ENOMEM;
868                 }
869         }
870
871         /* Req id ring for receiving RX pkts out of order */
872         for (i = 0; i < rx_ring->ring_size; i++)
873                 rx_ring->free_ids[i] = i;
874
875         /* Reset rx statistics */
876         memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
877
878         rx_ring->next_to_clean = 0;
879         rx_ring->next_to_use = 0;
880         rx_ring->cpu = ena_irq->cpu;
881
882         return 0;
883 }
884
885 /* ena_free_rx_resources - Free I/O Rx Resources
886  * @adapter: network interface device structure
887  * @qid: queue index
888  *
889  * Free all receive software resources
890  */
891 static void ena_free_rx_resources(struct ena_adapter *adapter,
892                                   u32 qid)
893 {
894         struct ena_ring *rx_ring = &adapter->rx_ring[qid];
895
896         vfree(rx_ring->rx_buffer_info);
897         rx_ring->rx_buffer_info = NULL;
898
899         vfree(rx_ring->free_ids);
900         rx_ring->free_ids = NULL;
901 }
902
903 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
904  * @adapter: board private structure
905  *
906  * Return 0 on success, negative on failure
907  */
908 static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
909 {
910         int i, rc = 0;
911
912         for (i = 0; i < adapter->num_io_queues; i++) {
913                 rc = ena_setup_rx_resources(adapter, i);
914                 if (rc)
915                         goto err_setup_rx;
916         }
917
918         return 0;
919
920 err_setup_rx:
921
922         netif_err(adapter, ifup, adapter->netdev,
923                   "Rx queue %d: allocation failed\n", i);
924
925         /* rewind the index freeing the rings as we go */
926         while (i--)
927                 ena_free_rx_resources(adapter, i);
928         return rc;
929 }
930
931 /* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
932  * @adapter: board private structure
933  *
934  * Free all receive software resources
935  */
936 static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
937 {
938         int i;
939
940         for (i = 0; i < adapter->num_io_queues; i++)
941                 ena_free_rx_resources(adapter, i);
942 }
943
944 static int ena_alloc_rx_page(struct ena_ring *rx_ring,
945                                     struct ena_rx_buffer *rx_info, gfp_t gfp)
946 {
947         struct ena_com_buf *ena_buf;
948         struct page *page;
949         dma_addr_t dma;
950
951         /* if previous allocated page is not used */
952         if (unlikely(rx_info->page))
953                 return 0;
954
955         page = alloc_page(gfp);
956         if (unlikely(!page)) {
957                 u64_stats_update_begin(&rx_ring->syncp);
958                 rx_ring->rx_stats.page_alloc_fail++;
959                 u64_stats_update_end(&rx_ring->syncp);
960                 return -ENOMEM;
961         }
962
963         /* To enable NIC-side port-mirroring, AKA SPAN port,
964          * we make the buffer readable from the nic as well
965          */
966         dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
967                            DMA_BIDIRECTIONAL);
968         if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
969                 u64_stats_update_begin(&rx_ring->syncp);
970                 rx_ring->rx_stats.dma_mapping_err++;
971                 u64_stats_update_end(&rx_ring->syncp);
972
973                 __free_page(page);
974                 return -EIO;
975         }
976         netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
977                   "alloc page %p, rx_info %p\n", page, rx_info);
978
979         rx_info->page = page;
980         rx_info->page_offset = 0;
981         ena_buf = &rx_info->ena_buf;
982         ena_buf->paddr = dma + rx_ring->rx_headroom;
983         ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom;
984
985         return 0;
986 }
987
988 static void ena_free_rx_page(struct ena_ring *rx_ring,
989                              struct ena_rx_buffer *rx_info)
990 {
991         struct page *page = rx_info->page;
992         struct ena_com_buf *ena_buf = &rx_info->ena_buf;
993
994         if (unlikely(!page)) {
995                 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
996                            "Trying to free unallocated buffer\n");
997                 return;
998         }
999
1000         dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom,
1001                        ENA_PAGE_SIZE,
1002                        DMA_BIDIRECTIONAL);
1003
1004         __free_page(page);
1005         rx_info->page = NULL;
1006 }
1007
1008 static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
1009 {
1010         u16 next_to_use, req_id;
1011         u32 i;
1012         int rc;
1013
1014         next_to_use = rx_ring->next_to_use;
1015
1016         for (i = 0; i < num; i++) {
1017                 struct ena_rx_buffer *rx_info;
1018
1019                 req_id = rx_ring->free_ids[next_to_use];
1020
1021                 rx_info = &rx_ring->rx_buffer_info[req_id];
1022
1023                 rc = ena_alloc_rx_page(rx_ring, rx_info,
1024                                        GFP_ATOMIC | __GFP_COMP);
1025                 if (unlikely(rc < 0)) {
1026                         netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
1027                                    "failed to alloc buffer for rx queue %d\n",
1028                                    rx_ring->qid);
1029                         break;
1030                 }
1031                 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
1032                                                 &rx_info->ena_buf,
1033                                                 req_id);
1034                 if (unlikely(rc)) {
1035                         netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1036                                    "failed to add buffer for rx queue %d\n",
1037                                    rx_ring->qid);
1038                         break;
1039                 }
1040                 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
1041                                                    rx_ring->ring_size);
1042         }
1043
1044         if (unlikely(i < num)) {
1045                 u64_stats_update_begin(&rx_ring->syncp);
1046                 rx_ring->rx_stats.refil_partial++;
1047                 u64_stats_update_end(&rx_ring->syncp);
1048                 netdev_warn(rx_ring->netdev,
1049                             "refilled rx qid %d with only %d buffers (from %d)\n",
1050                             rx_ring->qid, i, num);
1051         }
1052
1053         /* ena_com_write_sq_doorbell issues a wmb() */
1054         if (likely(i))
1055                 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
1056
1057         rx_ring->next_to_use = next_to_use;
1058
1059         return i;
1060 }
1061
1062 static void ena_free_rx_bufs(struct ena_adapter *adapter,
1063                              u32 qid)
1064 {
1065         struct ena_ring *rx_ring = &adapter->rx_ring[qid];
1066         u32 i;
1067
1068         for (i = 0; i < rx_ring->ring_size; i++) {
1069                 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
1070
1071                 if (rx_info->page)
1072                         ena_free_rx_page(rx_ring, rx_info);
1073         }
1074 }
1075
1076 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers
1077  * @adapter: board private structure
1078  */
1079 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
1080 {
1081         struct ena_ring *rx_ring;
1082         int i, rc, bufs_num;
1083
1084         for (i = 0; i < adapter->num_io_queues; i++) {
1085                 rx_ring = &adapter->rx_ring[i];
1086                 bufs_num = rx_ring->ring_size - 1;
1087                 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
1088
1089                 if (unlikely(rc != bufs_num))
1090                         netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
1091                                    "refilling Queue %d failed. allocated %d buffers from: %d\n",
1092                                    i, rc, bufs_num);
1093         }
1094 }
1095
1096 static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
1097 {
1098         int i;
1099
1100         for (i = 0; i < adapter->num_io_queues; i++)
1101                 ena_free_rx_bufs(adapter, i);
1102 }
1103
1104 static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
1105                               struct ena_tx_buffer *tx_info)
1106 {
1107         struct ena_com_buf *ena_buf;
1108         u32 cnt;
1109         int i;
1110
1111         ena_buf = tx_info->bufs;
1112         cnt = tx_info->num_of_bufs;
1113
1114         if (unlikely(!cnt))
1115                 return;
1116
1117         if (tx_info->map_linear_data) {
1118                 dma_unmap_single(tx_ring->dev,
1119                                  dma_unmap_addr(ena_buf, paddr),
1120                                  dma_unmap_len(ena_buf, len),
1121                                  DMA_TO_DEVICE);
1122                 ena_buf++;
1123                 cnt--;
1124         }
1125
1126         /* unmap remaining mapped pages */
1127         for (i = 0; i < cnt; i++) {
1128                 dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
1129                                dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
1130                 ena_buf++;
1131         }
1132 }
1133
1134 /* ena_free_tx_bufs - Free Tx Buffers per Queue
1135  * @tx_ring: TX ring for which buffers be freed
1136  */
1137 static void ena_free_tx_bufs(struct ena_ring *tx_ring)
1138 {
1139         bool print_once = true;
1140         u32 i;
1141
1142         for (i = 0; i < tx_ring->ring_size; i++) {
1143                 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
1144
1145                 if (!tx_info->skb)
1146                         continue;
1147
1148                 if (print_once) {
1149                         netdev_notice(tx_ring->netdev,
1150                                       "free uncompleted tx skb qid %d idx 0x%x\n",
1151                                       tx_ring->qid, i);
1152                         print_once = false;
1153                 } else {
1154                         netdev_dbg(tx_ring->netdev,
1155                                    "free uncompleted tx skb qid %d idx 0x%x\n",
1156                                    tx_ring->qid, i);
1157                 }
1158
1159                 ena_unmap_tx_buff(tx_ring, tx_info);
1160
1161                 dev_kfree_skb_any(tx_info->skb);
1162         }
1163         netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
1164                                                   tx_ring->qid));
1165 }
1166
1167 static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
1168 {
1169         struct ena_ring *tx_ring;
1170         int i;
1171
1172         for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1173                 tx_ring = &adapter->tx_ring[i];
1174                 ena_free_tx_bufs(tx_ring);
1175         }
1176 }
1177
1178 static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
1179 {
1180         u16 ena_qid;
1181         int i;
1182
1183         for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
1184                 ena_qid = ENA_IO_TXQ_IDX(i);
1185                 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1186         }
1187 }
1188
1189 static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
1190 {
1191         u16 ena_qid;
1192         int i;
1193
1194         for (i = 0; i < adapter->num_io_queues; i++) {
1195                 ena_qid = ENA_IO_RXQ_IDX(i);
1196                 cancel_work_sync(&adapter->ena_napi[i].dim.work);
1197                 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
1198         }
1199 }
1200
1201 static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
1202 {
1203         ena_destroy_all_tx_queues(adapter);
1204         ena_destroy_all_rx_queues(adapter);
1205 }
1206
1207 static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
1208                                  struct ena_tx_buffer *tx_info, bool is_xdp)
1209 {
1210         if (tx_info)
1211                 netif_err(ring->adapter,
1212                           tx_done,
1213                           ring->netdev,
1214                           "tx_info doesn't have valid %s",
1215                            is_xdp ? "xdp frame" : "skb");
1216         else
1217                 netif_err(ring->adapter,
1218                           tx_done,
1219                           ring->netdev,
1220                           "Invalid req_id: %hu\n",
1221                           req_id);
1222
1223         u64_stats_update_begin(&ring->syncp);
1224         ring->tx_stats.bad_req_id++;
1225         u64_stats_update_end(&ring->syncp);
1226
1227         /* Trigger device reset */
1228         ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
1229         set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags);
1230         return -EFAULT;
1231 }
1232
1233 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
1234 {
1235         struct ena_tx_buffer *tx_info = NULL;
1236
1237         if (likely(req_id < tx_ring->ring_size)) {
1238                 tx_info = &tx_ring->tx_buffer_info[req_id];
1239                 if (likely(tx_info->skb))
1240                         return 0;
1241         }
1242
1243         return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
1244 }
1245
1246 static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
1247 {
1248         struct ena_tx_buffer *tx_info = NULL;
1249
1250         if (likely(req_id < xdp_ring->ring_size)) {
1251                 tx_info = &xdp_ring->tx_buffer_info[req_id];
1252                 if (likely(tx_info->xdpf))
1253                         return 0;
1254         }
1255
1256         return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
1257 }
1258
1259 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
1260 {
1261         struct netdev_queue *txq;
1262         bool above_thresh;
1263         u32 tx_bytes = 0;
1264         u32 total_done = 0;
1265         u16 next_to_clean;
1266         u16 req_id;
1267         int tx_pkts = 0;
1268         int rc;
1269
1270         next_to_clean = tx_ring->next_to_clean;
1271         txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
1272
1273         while (tx_pkts < budget) {
1274                 struct ena_tx_buffer *tx_info;
1275                 struct sk_buff *skb;
1276
1277                 rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
1278                                                 &req_id);
1279                 if (rc)
1280                         break;
1281
1282                 rc = validate_tx_req_id(tx_ring, req_id);
1283                 if (rc)
1284                         break;
1285
1286                 tx_info = &tx_ring->tx_buffer_info[req_id];
1287                 skb = tx_info->skb;
1288
1289                 /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
1290                 prefetch(&skb->end);
1291
1292                 tx_info->skb = NULL;
1293                 tx_info->last_jiffies = 0;
1294
1295                 ena_unmap_tx_buff(tx_ring, tx_info);
1296
1297                 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1298                           "tx_poll: q %d skb %p completed\n", tx_ring->qid,
1299                           skb);
1300
1301                 tx_bytes += skb->len;
1302                 dev_kfree_skb(skb);
1303                 tx_pkts++;
1304                 total_done += tx_info->tx_descs;
1305
1306                 tx_ring->free_ids[next_to_clean] = req_id;
1307                 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1308                                                      tx_ring->ring_size);
1309         }
1310
1311         tx_ring->next_to_clean = next_to_clean;
1312         ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
1313         ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
1314
1315         netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
1316
1317         netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
1318                   "tx_poll: q %d done. total pkts: %d\n",
1319                   tx_ring->qid, tx_pkts);
1320
1321         /* need to make the rings circular update visible to
1322          * ena_start_xmit() before checking for netif_queue_stopped().
1323          */
1324         smp_mb();
1325
1326         above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1327                                                     ENA_TX_WAKEUP_THRESH);
1328         if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
1329                 __netif_tx_lock(txq, smp_processor_id());
1330                 above_thresh =
1331                         ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
1332                                                      ENA_TX_WAKEUP_THRESH);
1333                 if (netif_tx_queue_stopped(txq) && above_thresh &&
1334                     test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
1335                         netif_tx_wake_queue(txq);
1336                         u64_stats_update_begin(&tx_ring->syncp);
1337                         tx_ring->tx_stats.queue_wakeup++;
1338                         u64_stats_update_end(&tx_ring->syncp);
1339                 }
1340                 __netif_tx_unlock(txq);
1341         }
1342
1343         return tx_pkts;
1344 }
1345
1346 static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
1347 {
1348         struct sk_buff *skb;
1349
1350         if (frags)
1351                 skb = napi_get_frags(rx_ring->napi);
1352         else
1353                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1354                                                 rx_ring->rx_copybreak);
1355
1356         if (unlikely(!skb)) {
1357                 u64_stats_update_begin(&rx_ring->syncp);
1358                 rx_ring->rx_stats.skb_alloc_fail++;
1359                 u64_stats_update_end(&rx_ring->syncp);
1360                 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1361                           "Failed to allocate skb. frags: %d\n", frags);
1362                 return NULL;
1363         }
1364
1365         return skb;
1366 }
1367
1368 static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
1369                                   struct ena_com_rx_buf_info *ena_bufs,
1370                                   u32 descs,
1371                                   u16 *next_to_clean)
1372 {
1373         struct sk_buff *skb;
1374         struct ena_rx_buffer *rx_info;
1375         u16 len, req_id, buf = 0;
1376         void *va;
1377         int rc;
1378
1379         len = ena_bufs[buf].len;
1380         req_id = ena_bufs[buf].req_id;
1381
1382         rc = validate_rx_req_id(rx_ring, req_id);
1383         if (unlikely(rc < 0))
1384                 return NULL;
1385
1386         rx_info = &rx_ring->rx_buffer_info[req_id];
1387
1388         if (unlikely(!rx_info->page)) {
1389                 netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
1390                           "Page is NULL\n");
1391                 return NULL;
1392         }
1393
1394         netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1395                   "rx_info %p page %p\n",
1396                   rx_info, rx_info->page);
1397
1398         /* save virt address of first buffer */
1399         va = page_address(rx_info->page) + rx_info->page_offset;
1400         prefetch(va + NET_IP_ALIGN);
1401
1402         if (len <= rx_ring->rx_copybreak) {
1403                 skb = ena_alloc_skb(rx_ring, false);
1404                 if (unlikely(!skb))
1405                         return NULL;
1406
1407                 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1408                           "rx allocated small packet. len %d. data_len %d\n",
1409                           skb->len, skb->data_len);
1410
1411                 /* sync this buffer for CPU use */
1412                 dma_sync_single_for_cpu(rx_ring->dev,
1413                                         dma_unmap_addr(&rx_info->ena_buf, paddr),
1414                                         len,
1415                                         DMA_FROM_DEVICE);
1416                 skb_copy_to_linear_data(skb, va, len);
1417                 dma_sync_single_for_device(rx_ring->dev,
1418                                            dma_unmap_addr(&rx_info->ena_buf, paddr),
1419                                            len,
1420                                            DMA_FROM_DEVICE);
1421
1422                 skb_put(skb, len);
1423                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1424                 rx_ring->free_ids[*next_to_clean] = req_id;
1425                 *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
1426                                                      rx_ring->ring_size);
1427                 return skb;
1428         }
1429
1430         skb = ena_alloc_skb(rx_ring, true);
1431         if (unlikely(!skb))
1432                 return NULL;
1433
1434         do {
1435                 dma_unmap_page(rx_ring->dev,
1436                                dma_unmap_addr(&rx_info->ena_buf, paddr),
1437                                ENA_PAGE_SIZE, DMA_BIDIRECTIONAL);
1438
1439                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
1440                                 rx_info->page_offset, len, ENA_PAGE_SIZE);
1441                 /* The offset is non zero only for the first buffer */
1442                 rx_info->page_offset = 0;
1443
1444                 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1445                           "rx skb updated. len %d. data_len %d\n",
1446                           skb->len, skb->data_len);
1447
1448                 rx_info->page = NULL;
1449
1450                 rx_ring->free_ids[*next_to_clean] = req_id;
1451                 *next_to_clean =
1452                         ENA_RX_RING_IDX_NEXT(*next_to_clean,
1453                                              rx_ring->ring_size);
1454                 if (likely(--descs == 0))
1455                         break;
1456
1457                 buf++;
1458                 len = ena_bufs[buf].len;
1459                 req_id = ena_bufs[buf].req_id;
1460
1461                 rc = validate_rx_req_id(rx_ring, req_id);
1462                 if (unlikely(rc < 0))
1463                         return NULL;
1464
1465                 rx_info = &rx_ring->rx_buffer_info[req_id];
1466         } while (1);
1467
1468         return skb;
1469 }
1470
1471 /* ena_rx_checksum - indicate in skb if hw indicated a good cksum
1472  * @adapter: structure containing adapter specific data
1473  * @ena_rx_ctx: received packet context/metadata
1474  * @skb: skb currently being received and modified
1475  */
1476 static void ena_rx_checksum(struct ena_ring *rx_ring,
1477                                    struct ena_com_rx_ctx *ena_rx_ctx,
1478                                    struct sk_buff *skb)
1479 {
1480         /* Rx csum disabled */
1481         if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
1482                 skb->ip_summed = CHECKSUM_NONE;
1483                 return;
1484         }
1485
1486         /* For fragmented packets the checksum isn't valid */
1487         if (ena_rx_ctx->frag) {
1488                 skb->ip_summed = CHECKSUM_NONE;
1489                 return;
1490         }
1491
1492         /* if IP and error */
1493         if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1494                      (ena_rx_ctx->l3_csum_err))) {
1495                 /* ipv4 checksum error */
1496                 skb->ip_summed = CHECKSUM_NONE;
1497                 u64_stats_update_begin(&rx_ring->syncp);
1498                 rx_ring->rx_stats.bad_csum++;
1499                 u64_stats_update_end(&rx_ring->syncp);
1500                 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1501                           "RX IPv4 header checksum error\n");
1502                 return;
1503         }
1504
1505         /* if TCP/UDP */
1506         if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1507                    (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
1508                 if (unlikely(ena_rx_ctx->l4_csum_err)) {
1509                         /* TCP/UDP checksum error */
1510                         u64_stats_update_begin(&rx_ring->syncp);
1511                         rx_ring->rx_stats.bad_csum++;
1512                         u64_stats_update_end(&rx_ring->syncp);
1513                         netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1514                                   "RX L4 checksum error\n");
1515                         skb->ip_summed = CHECKSUM_NONE;
1516                         return;
1517                 }
1518
1519                 if (likely(ena_rx_ctx->l4_csum_checked)) {
1520                         skb->ip_summed = CHECKSUM_UNNECESSARY;
1521                         u64_stats_update_begin(&rx_ring->syncp);
1522                         rx_ring->rx_stats.csum_good++;
1523                         u64_stats_update_end(&rx_ring->syncp);
1524                 } else {
1525                         u64_stats_update_begin(&rx_ring->syncp);
1526                         rx_ring->rx_stats.csum_unchecked++;
1527                         u64_stats_update_end(&rx_ring->syncp);
1528                         skb->ip_summed = CHECKSUM_NONE;
1529                 }
1530         } else {
1531                 skb->ip_summed = CHECKSUM_NONE;
1532                 return;
1533         }
1534
1535 }
1536
1537 static void ena_set_rx_hash(struct ena_ring *rx_ring,
1538                             struct ena_com_rx_ctx *ena_rx_ctx,
1539                             struct sk_buff *skb)
1540 {
1541         enum pkt_hash_types hash_type;
1542
1543         if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1544                 if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1545                            (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1546
1547                         hash_type = PKT_HASH_TYPE_L4;
1548                 else
1549                         hash_type = PKT_HASH_TYPE_NONE;
1550
1551                 /* Override hash type if the packet is fragmented */
1552                 if (ena_rx_ctx->frag)
1553                         hash_type = PKT_HASH_TYPE_NONE;
1554
1555                 skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1556         }
1557 }
1558
1559 static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
1560 {
1561         struct ena_rx_buffer *rx_info;
1562         int ret;
1563
1564         rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1565         xdp->data = page_address(rx_info->page) +
1566                 rx_info->page_offset + rx_ring->rx_headroom;
1567         xdp_set_data_meta_invalid(xdp);
1568         xdp->data_hard_start = page_address(rx_info->page);
1569         xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
1570         /* If for some reason we received a bigger packet than
1571          * we expect, then we simply drop it
1572          */
1573         if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
1574                 return XDP_DROP;
1575
1576         ret = ena_xdp_execute(rx_ring, xdp, rx_info);
1577
1578         /* The xdp program might expand the headers */
1579         if (ret == XDP_PASS) {
1580                 rx_info->page_offset = xdp->data - xdp->data_hard_start;
1581                 rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1582         }
1583
1584         return ret;
1585 }
1586 /* ena_clean_rx_irq - Cleanup RX irq
1587  * @rx_ring: RX ring to clean
1588  * @napi: napi handler
1589  * @budget: how many packets driver is allowed to clean
1590  *
1591  * Returns the number of cleaned buffers.
1592  */
1593 static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1594                             u32 budget)
1595 {
1596         u16 next_to_clean = rx_ring->next_to_clean;
1597         struct ena_com_rx_ctx ena_rx_ctx;
1598         struct ena_rx_buffer *rx_info;
1599         struct ena_adapter *adapter;
1600         u32 res_budget, work_done;
1601         int rx_copybreak_pkt = 0;
1602         int refill_threshold;
1603         struct sk_buff *skb;
1604         int refill_required;
1605         struct xdp_buff xdp;
1606         int total_len = 0;
1607         int xdp_verdict;
1608         int rc = 0;
1609         int i;
1610
1611         netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1612                   "%s qid %d\n", __func__, rx_ring->qid);
1613         res_budget = budget;
1614         xdp.rxq = &rx_ring->xdp_rxq;
1615         xdp.frame_sz = ENA_PAGE_SIZE;
1616
1617         do {
1618                 xdp_verdict = XDP_PASS;
1619                 skb = NULL;
1620                 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1621                 ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1622                 ena_rx_ctx.descs = 0;
1623                 ena_rx_ctx.pkt_offset = 0;
1624                 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1625                                     rx_ring->ena_com_io_sq,
1626                                     &ena_rx_ctx);
1627                 if (unlikely(rc))
1628                         goto error;
1629
1630                 if (unlikely(ena_rx_ctx.descs == 0))
1631                         break;
1632
1633                 rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1634                 rx_info->page_offset = ena_rx_ctx.pkt_offset;
1635
1636                 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1637                           "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1638                           rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1639                           ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1640
1641                 if (ena_xdp_present_ring(rx_ring))
1642                         xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
1643
1644                 /* allocate skb and fill it */
1645                 if (xdp_verdict == XDP_PASS)
1646                         skb = ena_rx_skb(rx_ring,
1647                                          rx_ring->ena_bufs,
1648                                          ena_rx_ctx.descs,
1649                                          &next_to_clean);
1650
1651                 if (unlikely(!skb)) {
1652                         if (xdp_verdict == XDP_TX)
1653                                 ena_free_rx_page(rx_ring,
1654                                                  &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]);
1655                         for (i = 0; i < ena_rx_ctx.descs; i++) {
1656                                 rx_ring->free_ids[next_to_clean] =
1657                                         rx_ring->ena_bufs[i].req_id;
1658                                 next_to_clean =
1659                                         ENA_RX_RING_IDX_NEXT(next_to_clean,
1660                                                              rx_ring->ring_size);
1661                         }
1662                         if (xdp_verdict != XDP_PASS) {
1663                                 res_budget--;
1664                                 continue;
1665                         }
1666                         break;
1667                 }
1668
1669                 ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1670
1671                 ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1672
1673                 skb_record_rx_queue(skb, rx_ring->qid);
1674
1675                 if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
1676                         total_len += rx_ring->ena_bufs[0].len;
1677                         rx_copybreak_pkt++;
1678                         napi_gro_receive(napi, skb);
1679                 } else {
1680                         total_len += skb->len;
1681                         napi_gro_frags(napi);
1682                 }
1683
1684                 res_budget--;
1685         } while (likely(res_budget));
1686
1687         work_done = budget - res_budget;
1688         rx_ring->per_napi_packets += work_done;
1689         u64_stats_update_begin(&rx_ring->syncp);
1690         rx_ring->rx_stats.bytes += total_len;
1691         rx_ring->rx_stats.cnt += work_done;
1692         rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1693         u64_stats_update_end(&rx_ring->syncp);
1694
1695         rx_ring->next_to_clean = next_to_clean;
1696
1697         refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1698         refill_threshold =
1699                 min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1700                       ENA_RX_REFILL_THRESH_PACKET);
1701
1702         /* Optimization, try to batch new rx buffers */
1703         if (refill_required > refill_threshold) {
1704                 ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
1705                 ena_refill_rx_bufs(rx_ring, refill_required);
1706         }
1707
1708         return work_done;
1709
1710 error:
1711         adapter = netdev_priv(rx_ring->netdev);
1712
1713         u64_stats_update_begin(&rx_ring->syncp);
1714         rx_ring->rx_stats.bad_desc_num++;
1715         u64_stats_update_end(&rx_ring->syncp);
1716
1717         /* Too many desc from the device. Trigger reset */
1718         adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
1719         set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
1720
1721         return 0;
1722 }
1723
1724 static void ena_dim_work(struct work_struct *w)
1725 {
1726         struct dim *dim = container_of(w, struct dim, work);
1727         struct dim_cq_moder cur_moder =
1728                 net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1729         struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
1730
1731         ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1732         dim->state = DIM_START_MEASURE;
1733 }
1734
1735 static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1736 {
1737         struct dim_sample dim_sample;
1738         struct ena_ring *rx_ring = ena_napi->rx_ring;
1739
1740         if (!rx_ring->per_napi_packets)
1741                 return;
1742
1743         rx_ring->non_empty_napi_events++;
1744
1745         dim_update_sample(rx_ring->non_empty_napi_events,
1746                           rx_ring->rx_stats.cnt,
1747                           rx_ring->rx_stats.bytes,
1748                           &dim_sample);
1749
1750         net_dim(&ena_napi->dim, dim_sample);
1751
1752         rx_ring->per_napi_packets = 0;
1753 }
1754
1755 static void ena_unmask_interrupt(struct ena_ring *tx_ring,
1756                                         struct ena_ring *rx_ring)
1757 {
1758         struct ena_eth_io_intr_reg intr_reg;
1759         u32 rx_interval = 0;
1760         /* Rx ring can be NULL when for XDP tx queues which don't have an
1761          * accompanying rx_ring pair.
1762          */
1763         if (rx_ring)
1764                 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1765                         rx_ring->smoothed_interval :
1766                         ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
1767
1768         /* Update intr register: rx intr delay,
1769          * tx intr delay and interrupt unmask
1770          */
1771         ena_com_update_intr_reg(&intr_reg,
1772                                 rx_interval,
1773                                 tx_ring->smoothed_interval,
1774                                 true);
1775
1776         u64_stats_update_begin(&tx_ring->syncp);
1777         tx_ring->tx_stats.unmask_interrupt++;
1778         u64_stats_update_end(&tx_ring->syncp);
1779         /* It is a shared MSI-X.
1780          * Tx and Rx CQ have pointer to it.
1781          * So we use one of them to reach the intr reg
1782          * The Tx ring is used because the rx_ring is NULL for XDP queues
1783          */
1784         ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
1785 }
1786
1787 static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1788                                              struct ena_ring *rx_ring)
1789 {
1790         int cpu = get_cpu();
1791         int numa_node;
1792
1793         /* Check only one ring since the 2 rings are running on the same cpu */
1794         if (likely(tx_ring->cpu == cpu))
1795                 goto out;
1796
1797         numa_node = cpu_to_node(cpu);
1798         put_cpu();
1799
1800         if (numa_node != NUMA_NO_NODE) {
1801                 ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1802                 if (rx_ring)
1803                         ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1804                                                  numa_node);
1805         }
1806
1807         tx_ring->cpu = cpu;
1808         if (rx_ring)
1809                 rx_ring->cpu = cpu;
1810
1811         return;
1812 out:
1813         put_cpu();
1814 }
1815
1816 static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
1817 {
1818         u32 total_done = 0;
1819         u16 next_to_clean;
1820         u32 tx_bytes = 0;
1821         int tx_pkts = 0;
1822         u16 req_id;
1823         int rc;
1824
1825         if (unlikely(!xdp_ring))
1826                 return 0;
1827         next_to_clean = xdp_ring->next_to_clean;
1828
1829         while (tx_pkts < budget) {
1830                 struct ena_tx_buffer *tx_info;
1831                 struct xdp_frame *xdpf;
1832
1833                 rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
1834                                                 &req_id);
1835                 if (rc)
1836                         break;
1837
1838                 rc = validate_xdp_req_id(xdp_ring, req_id);
1839                 if (rc)
1840                         break;
1841
1842                 tx_info = &xdp_ring->tx_buffer_info[req_id];
1843                 xdpf = tx_info->xdpf;
1844
1845                 tx_info->xdpf = NULL;
1846                 tx_info->last_jiffies = 0;
1847                 ena_unmap_tx_buff(xdp_ring, tx_info);
1848
1849                 netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1850                           "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
1851                           xdpf);
1852
1853                 tx_bytes += xdpf->len;
1854                 tx_pkts++;
1855                 total_done += tx_info->tx_descs;
1856
1857                 __free_page(tx_info->xdp_rx_page);
1858                 xdp_ring->free_ids[next_to_clean] = req_id;
1859                 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
1860                                                      xdp_ring->ring_size);
1861         }
1862
1863         xdp_ring->next_to_clean = next_to_clean;
1864         ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
1865         ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
1866
1867         netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
1868                   "tx_poll: q %d done. total pkts: %d\n",
1869                   xdp_ring->qid, tx_pkts);
1870
1871         return tx_pkts;
1872 }
1873
1874 static int ena_io_poll(struct napi_struct *napi, int budget)
1875 {
1876         struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1877         struct ena_ring *tx_ring, *rx_ring;
1878         int tx_work_done;
1879         int rx_work_done = 0;
1880         int tx_budget;
1881         int napi_comp_call = 0;
1882         int ret;
1883
1884         tx_ring = ena_napi->tx_ring;
1885         rx_ring = ena_napi->rx_ring;
1886
1887         tx_ring->first_interrupt = ena_napi->first_interrupt;
1888         rx_ring->first_interrupt = ena_napi->first_interrupt;
1889
1890         tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1891
1892         if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1893             test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1894                 napi_complete_done(napi, 0);
1895                 return 0;
1896         }
1897
1898         tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1899         /* On netpoll the budget is zero and the handler should only clean the
1900          * tx completions.
1901          */
1902         if (likely(budget))
1903                 rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1904
1905         /* If the device is about to reset or down, avoid unmask
1906          * the interrupt and return 0 so NAPI won't reschedule
1907          */
1908         if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1909                      test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1910                 napi_complete_done(napi, 0);
1911                 ret = 0;
1912
1913         } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1914                 napi_comp_call = 1;
1915
1916                 /* Update numa and unmask the interrupt only when schedule
1917                  * from the interrupt context (vs from sk_busy_loop)
1918                  */
1919                 if (napi_complete_done(napi, rx_work_done) &&
1920                     READ_ONCE(ena_napi->interrupts_masked)) {
1921                         smp_rmb(); /* make sure interrupts_masked is read */
1922                         WRITE_ONCE(ena_napi->interrupts_masked, false);
1923                         /* We apply adaptive moderation on Rx path only.
1924                          * Tx uses static interrupt moderation.
1925                          */
1926                         if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1927                                 ena_adjust_adaptive_rx_intr_moderation(ena_napi);
1928
1929                         ena_unmask_interrupt(tx_ring, rx_ring);
1930                 }
1931
1932                 ena_update_ring_numa_node(tx_ring, rx_ring);
1933
1934                 ret = rx_work_done;
1935         } else {
1936                 ret = budget;
1937         }
1938
1939         u64_stats_update_begin(&tx_ring->syncp);
1940         tx_ring->tx_stats.napi_comp += napi_comp_call;
1941         tx_ring->tx_stats.tx_poll++;
1942         u64_stats_update_end(&tx_ring->syncp);
1943
1944         return ret;
1945 }
1946
1947 static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1948 {
1949         struct ena_adapter *adapter = (struct ena_adapter *)data;
1950
1951         ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1952
1953         /* Don't call the aenq handler before probe is done */
1954         if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1955                 ena_com_aenq_intr_handler(adapter->ena_dev, data);
1956
1957         return IRQ_HANDLED;
1958 }
1959
1960 /* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1961  * @irq: interrupt number
1962  * @data: pointer to a network interface private napi device structure
1963  */
1964 static irqreturn_t ena_intr_msix_io(int irq, void *data)
1965 {
1966         struct ena_napi *ena_napi = data;
1967
1968         ena_napi->first_interrupt = true;
1969
1970         WRITE_ONCE(ena_napi->interrupts_masked, true);
1971         smp_wmb(); /* write interrupts_masked before calling napi */
1972
1973         napi_schedule_irqoff(&ena_napi->napi);
1974
1975         return IRQ_HANDLED;
1976 }
1977
1978 /* Reserve a single MSI-X vector for management (admin + aenq).
1979  * plus reserve one vector for each potential io queue.
1980  * the number of potential io queues is the minimum of what the device
1981  * supports and the number of vCPUs.
1982  */
1983 static int ena_enable_msix(struct ena_adapter *adapter)
1984 {
1985         int msix_vecs, irq_cnt;
1986
1987         if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1988                 netif_err(adapter, probe, adapter->netdev,
1989                           "Error, MSI-X is already enabled\n");
1990                 return -EPERM;
1991         }
1992
1993         /* Reserved the max msix vectors we might need */
1994         msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1995         netif_dbg(adapter, probe, adapter->netdev,
1996                   "trying to enable MSI-X, vectors %d\n", msix_vecs);
1997
1998         irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1999                                         msix_vecs, PCI_IRQ_MSIX);
2000
2001         if (irq_cnt < 0) {
2002                 netif_err(adapter, probe, adapter->netdev,
2003                           "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
2004                 return -ENOSPC;
2005         }
2006
2007         if (irq_cnt != msix_vecs) {
2008                 netif_notice(adapter, probe, adapter->netdev,
2009                              "enable only %d MSI-X (out of %d), reduce the number of queues\n",
2010                              irq_cnt, msix_vecs);
2011                 adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
2012         }
2013
2014         if (ena_init_rx_cpu_rmap(adapter))
2015                 netif_warn(adapter, probe, adapter->netdev,
2016                            "Failed to map IRQs to CPUs\n");
2017
2018         adapter->msix_vecs = irq_cnt;
2019         set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
2020
2021         return 0;
2022 }
2023
2024 static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
2025 {
2026         u32 cpu;
2027
2028         snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
2029                  ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
2030                  pci_name(adapter->pdev));
2031         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
2032                 ena_intr_msix_mgmnt;
2033         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
2034         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
2035                 pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
2036         cpu = cpumask_first(cpu_online_mask);
2037         adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
2038         cpumask_set_cpu(cpu,
2039                         &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
2040 }
2041
2042 static void ena_setup_io_intr(struct ena_adapter *adapter)
2043 {
2044         struct net_device *netdev;
2045         int irq_idx, i, cpu;
2046         int io_queue_count;
2047
2048         netdev = adapter->netdev;
2049         io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2050
2051         for (i = 0; i < io_queue_count; i++) {
2052                 irq_idx = ENA_IO_IRQ_IDX(i);
2053                 cpu = i % num_online_cpus();
2054
2055                 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
2056                          "%s-Tx-Rx-%d", netdev->name, i);
2057                 adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
2058                 adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
2059                 adapter->irq_tbl[irq_idx].vector =
2060                         pci_irq_vector(adapter->pdev, irq_idx);
2061                 adapter->irq_tbl[irq_idx].cpu = cpu;
2062
2063                 cpumask_set_cpu(cpu,
2064                                 &adapter->irq_tbl[irq_idx].affinity_hint_mask);
2065         }
2066 }
2067
2068 static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
2069 {
2070         unsigned long flags = 0;
2071         struct ena_irq *irq;
2072         int rc;
2073
2074         irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2075         rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2076                          irq->data);
2077         if (rc) {
2078                 netif_err(adapter, probe, adapter->netdev,
2079                           "failed to request admin irq\n");
2080                 return rc;
2081         }
2082
2083         netif_dbg(adapter, probe, adapter->netdev,
2084                   "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
2085                   irq->affinity_hint_mask.bits[0], irq->vector);
2086
2087         irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2088
2089         return rc;
2090 }
2091
2092 static int ena_request_io_irq(struct ena_adapter *adapter)
2093 {
2094         u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2095         unsigned long flags = 0;
2096         struct ena_irq *irq;
2097         int rc = 0, i, k;
2098
2099         if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
2100                 netif_err(adapter, ifup, adapter->netdev,
2101                           "Failed to request I/O IRQ: MSI-X is not enabled\n");
2102                 return -EINVAL;
2103         }
2104
2105         for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2106                 irq = &adapter->irq_tbl[i];
2107                 rc = request_irq(irq->vector, irq->handler, flags, irq->name,
2108                                  irq->data);
2109                 if (rc) {
2110                         netif_err(adapter, ifup, adapter->netdev,
2111                                   "Failed to request I/O IRQ. index %d rc %d\n",
2112                                    i, rc);
2113                         goto err;
2114                 }
2115
2116                 netif_dbg(adapter, ifup, adapter->netdev,
2117                           "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
2118                           i, irq->affinity_hint_mask.bits[0], irq->vector);
2119
2120                 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
2121         }
2122
2123         return rc;
2124
2125 err:
2126         for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
2127                 irq = &adapter->irq_tbl[k];
2128                 free_irq(irq->vector, irq->data);
2129         }
2130
2131         return rc;
2132 }
2133
2134 static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
2135 {
2136         struct ena_irq *irq;
2137
2138         irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
2139         synchronize_irq(irq->vector);
2140         irq_set_affinity_hint(irq->vector, NULL);
2141         free_irq(irq->vector, irq->data);
2142 }
2143
2144 static void ena_free_io_irq(struct ena_adapter *adapter)
2145 {
2146         u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2147         struct ena_irq *irq;
2148         int i;
2149
2150 #ifdef CONFIG_RFS_ACCEL
2151         if (adapter->msix_vecs >= 1) {
2152                 free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
2153                 adapter->netdev->rx_cpu_rmap = NULL;
2154         }
2155 #endif /* CONFIG_RFS_ACCEL */
2156
2157         for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
2158                 irq = &adapter->irq_tbl[i];
2159                 irq_set_affinity_hint(irq->vector, NULL);
2160                 free_irq(irq->vector, irq->data);
2161         }
2162 }
2163
2164 static void ena_disable_msix(struct ena_adapter *adapter)
2165 {
2166         if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
2167                 pci_free_irq_vectors(adapter->pdev);
2168 }
2169
2170 static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
2171 {
2172         u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2173         int i;
2174
2175         if (!netif_running(adapter->netdev))
2176                 return;
2177
2178         for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
2179                 synchronize_irq(adapter->irq_tbl[i].vector);
2180 }
2181
2182 static void ena_del_napi_in_range(struct ena_adapter *adapter,
2183                                   int first_index,
2184                                   int count)
2185 {
2186         int i;
2187
2188         for (i = first_index; i < first_index + count; i++) {
2189                 /* Check if napi was initialized before */
2190                 if (!ENA_IS_XDP_INDEX(adapter, i) ||
2191                     adapter->ena_napi[i].xdp_ring)
2192                         netif_napi_del(&adapter->ena_napi[i].napi);
2193                 else
2194                         WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
2195                                 adapter->ena_napi[i].xdp_ring);
2196         }
2197 }
2198
2199 static void ena_init_napi_in_range(struct ena_adapter *adapter,
2200                                    int first_index, int count)
2201 {
2202         int i;
2203
2204         for (i = first_index; i < first_index + count; i++) {
2205                 struct ena_napi *napi = &adapter->ena_napi[i];
2206
2207                 netif_napi_add(adapter->netdev,
2208                                &napi->napi,
2209                                ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll,
2210                                ENA_NAPI_BUDGET);
2211
2212                 if (!ENA_IS_XDP_INDEX(adapter, i)) {
2213                         napi->rx_ring = &adapter->rx_ring[i];
2214                         napi->tx_ring = &adapter->tx_ring[i];
2215                 } else {
2216                         napi->xdp_ring = &adapter->tx_ring[i];
2217                 }
2218                 napi->qid = i;
2219         }
2220 }
2221
2222 static void ena_napi_disable_in_range(struct ena_adapter *adapter,
2223                                       int first_index,
2224                                       int count)
2225 {
2226         int i;
2227
2228         for (i = first_index; i < first_index + count; i++)
2229                 napi_disable(&adapter->ena_napi[i].napi);
2230 }
2231
2232 static void ena_napi_enable_in_range(struct ena_adapter *adapter,
2233                                      int first_index,
2234                                      int count)
2235 {
2236         int i;
2237
2238         for (i = first_index; i < first_index + count; i++)
2239                 napi_enable(&adapter->ena_napi[i].napi);
2240 }
2241
2242 /* Configure the Rx forwarding */
2243 static int ena_rss_configure(struct ena_adapter *adapter)
2244 {
2245         struct ena_com_dev *ena_dev = adapter->ena_dev;
2246         int rc;
2247
2248         /* In case the RSS table wasn't initialized by probe */
2249         if (!ena_dev->rss.tbl_log_size) {
2250                 rc = ena_rss_init_default(adapter);
2251                 if (rc && (rc != -EOPNOTSUPP)) {
2252                         netif_err(adapter, ifup, adapter->netdev,
2253                                   "Failed to init RSS rc: %d\n", rc);
2254                         return rc;
2255                 }
2256         }
2257
2258         /* Set indirect table */
2259         rc = ena_com_indirect_table_set(ena_dev);
2260         if (unlikely(rc && rc != -EOPNOTSUPP))
2261                 return rc;
2262
2263         /* Configure hash function (if supported) */
2264         rc = ena_com_set_hash_function(ena_dev);
2265         if (unlikely(rc && (rc != -EOPNOTSUPP)))
2266                 return rc;
2267
2268         /* Configure hash inputs (if supported) */
2269         rc = ena_com_set_hash_ctrl(ena_dev);
2270         if (unlikely(rc && (rc != -EOPNOTSUPP)))
2271                 return rc;
2272
2273         return 0;
2274 }
2275
2276 static int ena_up_complete(struct ena_adapter *adapter)
2277 {
2278         int rc;
2279
2280         rc = ena_rss_configure(adapter);
2281         if (rc)
2282                 return rc;
2283
2284         ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
2285
2286         ena_refill_all_rx_bufs(adapter);
2287
2288         /* enable transmits */
2289         netif_tx_start_all_queues(adapter->netdev);
2290
2291         ena_napi_enable_in_range(adapter,
2292                                  0,
2293                                  adapter->xdp_num_queues + adapter->num_io_queues);
2294
2295         return 0;
2296 }
2297
2298 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
2299 {
2300         struct ena_com_create_io_ctx ctx;
2301         struct ena_com_dev *ena_dev;
2302         struct ena_ring *tx_ring;
2303         u32 msix_vector;
2304         u16 ena_qid;
2305         int rc;
2306
2307         ena_dev = adapter->ena_dev;
2308
2309         tx_ring = &adapter->tx_ring[qid];
2310         msix_vector = ENA_IO_IRQ_IDX(qid);
2311         ena_qid = ENA_IO_TXQ_IDX(qid);
2312
2313         memset(&ctx, 0x0, sizeof(ctx));
2314
2315         ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
2316         ctx.qid = ena_qid;
2317         ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
2318         ctx.msix_vector = msix_vector;
2319         ctx.queue_size = tx_ring->ring_size;
2320         ctx.numa_node = cpu_to_node(tx_ring->cpu);
2321
2322         rc = ena_com_create_io_queue(ena_dev, &ctx);
2323         if (rc) {
2324                 netif_err(adapter, ifup, adapter->netdev,
2325                           "Failed to create I/O TX queue num %d rc: %d\n",
2326                           qid, rc);
2327                 return rc;
2328         }
2329
2330         rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2331                                      &tx_ring->ena_com_io_sq,
2332                                      &tx_ring->ena_com_io_cq);
2333         if (rc) {
2334                 netif_err(adapter, ifup, adapter->netdev,
2335                           "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
2336                           qid, rc);
2337                 ena_com_destroy_io_queue(ena_dev, ena_qid);
2338                 return rc;
2339         }
2340
2341         ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
2342         return rc;
2343 }
2344
2345 static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
2346                                             int first_index, int count)
2347 {
2348         struct ena_com_dev *ena_dev = adapter->ena_dev;
2349         int rc, i;
2350
2351         for (i = first_index; i < first_index + count; i++) {
2352                 rc = ena_create_io_tx_queue(adapter, i);
2353                 if (rc)
2354                         goto create_err;
2355         }
2356
2357         return 0;
2358
2359 create_err:
2360         while (i-- > first_index)
2361                 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
2362
2363         return rc;
2364 }
2365
2366 static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
2367 {
2368         struct ena_com_dev *ena_dev;
2369         struct ena_com_create_io_ctx ctx;
2370         struct ena_ring *rx_ring;
2371         u32 msix_vector;
2372         u16 ena_qid;
2373         int rc;
2374
2375         ena_dev = adapter->ena_dev;
2376
2377         rx_ring = &adapter->rx_ring[qid];
2378         msix_vector = ENA_IO_IRQ_IDX(qid);
2379         ena_qid = ENA_IO_RXQ_IDX(qid);
2380
2381         memset(&ctx, 0x0, sizeof(ctx));
2382
2383         ctx.qid = ena_qid;
2384         ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
2385         ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
2386         ctx.msix_vector = msix_vector;
2387         ctx.queue_size = rx_ring->ring_size;
2388         ctx.numa_node = cpu_to_node(rx_ring->cpu);
2389
2390         rc = ena_com_create_io_queue(ena_dev, &ctx);
2391         if (rc) {
2392                 netif_err(adapter, ifup, adapter->netdev,
2393                           "Failed to create I/O RX queue num %d rc: %d\n",
2394                           qid, rc);
2395                 return rc;
2396         }
2397
2398         rc = ena_com_get_io_handlers(ena_dev, ena_qid,
2399                                      &rx_ring->ena_com_io_sq,
2400                                      &rx_ring->ena_com_io_cq);
2401         if (rc) {
2402                 netif_err(adapter, ifup, adapter->netdev,
2403                           "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
2404                           qid, rc);
2405                 goto err;
2406         }
2407
2408         ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
2409
2410         return rc;
2411 err:
2412         ena_com_destroy_io_queue(ena_dev, ena_qid);
2413         return rc;
2414 }
2415
2416 static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
2417 {
2418         struct ena_com_dev *ena_dev = adapter->ena_dev;
2419         int rc, i;
2420
2421         for (i = 0; i < adapter->num_io_queues; i++) {
2422                 rc = ena_create_io_rx_queue(adapter, i);
2423                 if (rc)
2424                         goto create_err;
2425                 INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
2426         }
2427
2428         return 0;
2429
2430 create_err:
2431         while (i--) {
2432                 cancel_work_sync(&adapter->ena_napi[i].dim.work);
2433                 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2434         }
2435
2436         return rc;
2437 }
2438
2439 static void set_io_rings_size(struct ena_adapter *adapter,
2440                               int new_tx_size,
2441                               int new_rx_size)
2442 {
2443         int i;
2444
2445         for (i = 0; i < adapter->num_io_queues; i++) {
2446                 adapter->tx_ring[i].ring_size = new_tx_size;
2447                 adapter->rx_ring[i].ring_size = new_rx_size;
2448         }
2449 }
2450
2451 /* This function allows queue allocation to backoff when the system is
2452  * low on memory. If there is not enough memory to allocate io queues
2453  * the driver will try to allocate smaller queues.
2454  *
2455  * The backoff algorithm is as follows:
2456  *  1. Try to allocate TX and RX and if successful.
2457  *  1.1. return success
2458  *
2459  *  2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2460  *
2461  *  3. If TX or RX is smaller than 256
2462  *  3.1. return failure.
2463  *  4. else
2464  *  4.1. go back to 1.
2465  */
2466 static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2467 {
2468         int rc, cur_rx_ring_size, cur_tx_ring_size;
2469         int new_rx_ring_size, new_tx_ring_size;
2470
2471         /* current queue sizes might be set to smaller than the requested
2472          * ones due to past queue allocation failures.
2473          */
2474         set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2475                           adapter->requested_rx_ring_size);
2476
2477         while (1) {
2478                 if (ena_xdp_present(adapter)) {
2479                         rc = ena_setup_and_create_all_xdp_queues(adapter);
2480
2481                         if (rc)
2482                                 goto err_setup_tx;
2483                 }
2484                 rc = ena_setup_tx_resources_in_range(adapter,
2485                                                      0,
2486                                                      adapter->num_io_queues);
2487                 if (rc)
2488                         goto err_setup_tx;
2489
2490                 rc = ena_create_io_tx_queues_in_range(adapter,
2491                                                       0,
2492                                                       adapter->num_io_queues);
2493                 if (rc)
2494                         goto err_create_tx_queues;
2495
2496                 rc = ena_setup_all_rx_resources(adapter);
2497                 if (rc)
2498                         goto err_setup_rx;
2499
2500                 rc = ena_create_all_io_rx_queues(adapter);
2501                 if (rc)
2502                         goto err_create_rx_queues;
2503
2504                 return 0;
2505
2506 err_create_rx_queues:
2507                 ena_free_all_io_rx_resources(adapter);
2508 err_setup_rx:
2509                 ena_destroy_all_tx_queues(adapter);
2510 err_create_tx_queues:
2511                 ena_free_all_io_tx_resources(adapter);
2512 err_setup_tx:
2513                 if (rc != -ENOMEM) {
2514                         netif_err(adapter, ifup, adapter->netdev,
2515                                   "Queue creation failed with error code %d\n",
2516                                   rc);
2517                         return rc;
2518                 }
2519
2520                 cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2521                 cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2522
2523                 netif_err(adapter, ifup, adapter->netdev,
2524                           "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2525                           cur_tx_ring_size, cur_rx_ring_size);
2526
2527                 new_tx_ring_size = cur_tx_ring_size;
2528                 new_rx_ring_size = cur_rx_ring_size;
2529
2530                 /* Decrease the size of the larger queue, or
2531                  * decrease both if they are the same size.
2532                  */
2533                 if (cur_rx_ring_size <= cur_tx_ring_size)
2534                         new_tx_ring_size = cur_tx_ring_size / 2;
2535                 if (cur_rx_ring_size >= cur_tx_ring_size)
2536                         new_rx_ring_size = cur_rx_ring_size / 2;
2537
2538                 if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2539                     new_rx_ring_size < ENA_MIN_RING_SIZE) {
2540                         netif_err(adapter, ifup, adapter->netdev,
2541                                   "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2542                                   ENA_MIN_RING_SIZE);
2543                         return rc;
2544                 }
2545
2546                 netif_err(adapter, ifup, adapter->netdev,
2547                           "Retrying queue creation with sizes TX=%d, RX=%d\n",
2548                           new_tx_ring_size,
2549                           new_rx_ring_size);
2550
2551                 set_io_rings_size(adapter, new_tx_ring_size,
2552                                   new_rx_ring_size);
2553         }
2554 }
2555
2556 static int ena_up(struct ena_adapter *adapter)
2557 {
2558         int io_queue_count, rc, i;
2559
2560         netdev_dbg(adapter->netdev, "%s\n", __func__);
2561
2562         io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2563         ena_setup_io_intr(adapter);
2564
2565         /* napi poll functions should be initialized before running
2566          * request_irq(), to handle a rare condition where there is a pending
2567          * interrupt, causing the ISR to fire immediately while the poll
2568          * function wasn't set yet, causing a null dereference
2569          */
2570         ena_init_napi_in_range(adapter, 0, io_queue_count);
2571
2572         rc = ena_request_io_irq(adapter);
2573         if (rc)
2574                 goto err_req_irq;
2575
2576         rc = create_queues_with_size_backoff(adapter);
2577         if (rc)
2578                 goto err_create_queues_with_backoff;
2579
2580         rc = ena_up_complete(adapter);
2581         if (rc)
2582                 goto err_up;
2583
2584         if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2585                 netif_carrier_on(adapter->netdev);
2586
2587         u64_stats_update_begin(&adapter->syncp);
2588         adapter->dev_stats.interface_up++;
2589         u64_stats_update_end(&adapter->syncp);
2590
2591         set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2592
2593         /* Enable completion queues interrupt */
2594         for (i = 0; i < adapter->num_io_queues; i++)
2595                 ena_unmask_interrupt(&adapter->tx_ring[i],
2596                                      &adapter->rx_ring[i]);
2597
2598         /* schedule napi in case we had pending packets
2599          * from the last time we disable napi
2600          */
2601         for (i = 0; i < io_queue_count; i++)
2602                 napi_schedule(&adapter->ena_napi[i].napi);
2603
2604         return rc;
2605
2606 err_up:
2607         ena_destroy_all_tx_queues(adapter);
2608         ena_free_all_io_tx_resources(adapter);
2609         ena_destroy_all_rx_queues(adapter);
2610         ena_free_all_io_rx_resources(adapter);
2611 err_create_queues_with_backoff:
2612         ena_free_io_irq(adapter);
2613 err_req_irq:
2614         ena_del_napi_in_range(adapter, 0, io_queue_count);
2615
2616         return rc;
2617 }
2618
2619 static void ena_down(struct ena_adapter *adapter)
2620 {
2621         int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2622
2623         netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
2624
2625         clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2626
2627         u64_stats_update_begin(&adapter->syncp);
2628         adapter->dev_stats.interface_down++;
2629         u64_stats_update_end(&adapter->syncp);
2630
2631         netif_carrier_off(adapter->netdev);
2632         netif_tx_disable(adapter->netdev);
2633
2634         /* After this point the napi handler won't enable the tx queue */
2635         ena_napi_disable_in_range(adapter, 0, io_queue_count);
2636
2637         /* After destroy the queue there won't be any new interrupts */
2638
2639         if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
2640                 int rc;
2641
2642                 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2643                 if (rc)
2644                         dev_err(&adapter->pdev->dev, "Device reset failed\n");
2645                 /* stop submitting admin commands on a device that was reset */
2646                 ena_com_set_admin_running_state(adapter->ena_dev, false);
2647         }
2648
2649         ena_destroy_all_io_queues(adapter);
2650
2651         ena_disable_io_intr_sync(adapter);
2652         ena_free_io_irq(adapter);
2653         ena_del_napi_in_range(adapter, 0, io_queue_count);
2654
2655         ena_free_all_tx_bufs(adapter);
2656         ena_free_all_rx_bufs(adapter);
2657         ena_free_all_io_tx_resources(adapter);
2658         ena_free_all_io_rx_resources(adapter);
2659 }
2660
2661 /* ena_open - Called when a network interface is made active
2662  * @netdev: network interface device structure
2663  *
2664  * Returns 0 on success, negative value on failure
2665  *
2666  * The open entry point is called when a network interface is made
2667  * active by the system (IFF_UP).  At this point all resources needed
2668  * for transmit and receive operations are allocated, the interrupt
2669  * handler is registered with the OS, the watchdog timer is started,
2670  * and the stack is notified that the interface is ready.
2671  */
2672 static int ena_open(struct net_device *netdev)
2673 {
2674         struct ena_adapter *adapter = netdev_priv(netdev);
2675         int rc;
2676
2677         /* Notify the stack of the actual queue counts. */
2678         rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
2679         if (rc) {
2680                 netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
2681                 return rc;
2682         }
2683
2684         rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
2685         if (rc) {
2686                 netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
2687                 return rc;
2688         }
2689
2690         rc = ena_up(adapter);
2691         if (rc)
2692                 return rc;
2693
2694         return rc;
2695 }
2696
2697 /* ena_close - Disables a network interface
2698  * @netdev: network interface device structure
2699  *
2700  * Returns 0, this is not allowed to fail
2701  *
2702  * The close entry point is called when an interface is de-activated
2703  * by the OS.  The hardware is still under the drivers control, but
2704  * needs to be disabled.  A global MAC reset is issued to stop the
2705  * hardware, and all transmit and receive resources are freed.
2706  */
2707 static int ena_close(struct net_device *netdev)
2708 {
2709         struct ena_adapter *adapter = netdev_priv(netdev);
2710
2711         netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2712
2713         if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2714                 return 0;
2715
2716         if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2717                 ena_down(adapter);
2718
2719         /* Check for device status and issue reset if needed*/
2720         check_for_admin_com_state(adapter);
2721         if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2722                 netif_err(adapter, ifdown, adapter->netdev,
2723                           "Destroy failure, restarting device\n");
2724                 ena_dump_stats_to_dmesg(adapter);
2725                 /* rtnl lock already obtained in dev_ioctl() layer */
2726                 ena_destroy_device(adapter, false);
2727                 ena_restore_device(adapter);
2728         }
2729
2730         return 0;
2731 }
2732
2733 int ena_update_queue_sizes(struct ena_adapter *adapter,
2734                            u32 new_tx_size,
2735                            u32 new_rx_size)
2736 {
2737         bool dev_was_up;
2738
2739         dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2740         ena_close(adapter->netdev);
2741         adapter->requested_tx_ring_size = new_tx_size;
2742         adapter->requested_rx_ring_size = new_rx_size;
2743         ena_init_io_rings(adapter,
2744                           0,
2745                           adapter->xdp_num_queues +
2746                           adapter->num_io_queues);
2747         return dev_was_up ? ena_up(adapter) : 0;
2748 }
2749
2750 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2751 {
2752         struct ena_com_dev *ena_dev = adapter->ena_dev;
2753         int prev_channel_count;
2754         bool dev_was_up;
2755
2756         dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2757         ena_close(adapter->netdev);
2758         prev_channel_count = adapter->num_io_queues;
2759         adapter->num_io_queues = new_channel_count;
2760         if (ena_xdp_present(adapter) &&
2761             ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2762                 adapter->xdp_first_ring = new_channel_count;
2763                 adapter->xdp_num_queues = new_channel_count;
2764                 if (prev_channel_count > new_channel_count)
2765                         ena_xdp_exchange_program_rx_in_range(adapter,
2766                                                              NULL,
2767                                                              new_channel_count,
2768                                                              prev_channel_count);
2769                 else
2770                         ena_xdp_exchange_program_rx_in_range(adapter,
2771                                                              adapter->xdp_bpf_prog,
2772                                                              prev_channel_count,
2773                                                              new_channel_count);
2774         }
2775
2776         /* We need to destroy the rss table so that the indirection
2777          * table will be reinitialized by ena_up()
2778          */
2779         ena_com_rss_destroy(ena_dev);
2780         ena_init_io_rings(adapter,
2781                           0,
2782                           adapter->xdp_num_queues +
2783                           adapter->num_io_queues);
2784         return dev_was_up ? ena_open(adapter->netdev) : 0;
2785 }
2786
2787 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
2788                         struct sk_buff *skb,
2789                         bool disable_meta_caching)
2790 {
2791         u32 mss = skb_shinfo(skb)->gso_size;
2792         struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2793         u8 l4_protocol = 0;
2794
2795         if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2796                 ena_tx_ctx->l4_csum_enable = 1;
2797                 if (mss) {
2798                         ena_tx_ctx->tso_enable = 1;
2799                         ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2800                         ena_tx_ctx->l4_csum_partial = 0;
2801                 } else {
2802                         ena_tx_ctx->tso_enable = 0;
2803                         ena_meta->l4_hdr_len = 0;
2804                         ena_tx_ctx->l4_csum_partial = 1;
2805                 }
2806
2807                 switch (ip_hdr(skb)->version) {
2808                 case IPVERSION:
2809                         ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2810                         if (ip_hdr(skb)->frag_off & htons(IP_DF))
2811                                 ena_tx_ctx->df = 1;
2812                         if (mss)
2813                                 ena_tx_ctx->l3_csum_enable = 1;
2814                         l4_protocol = ip_hdr(skb)->protocol;
2815                         break;
2816                 case 6:
2817                         ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2818                         l4_protocol = ipv6_hdr(skb)->nexthdr;
2819                         break;
2820                 default:
2821                         break;
2822                 }
2823
2824                 if (l4_protocol == IPPROTO_TCP)
2825                         ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2826                 else
2827                         ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2828
2829                 ena_meta->mss = mss;
2830                 ena_meta->l3_hdr_len = skb_network_header_len(skb);
2831                 ena_meta->l3_hdr_offset = skb_network_offset(skb);
2832                 ena_tx_ctx->meta_valid = 1;
2833         } else if (disable_meta_caching) {
2834                 memset(ena_meta, 0, sizeof(*ena_meta));
2835                 ena_tx_ctx->meta_valid = 1;
2836         } else {
2837                 ena_tx_ctx->meta_valid = 0;
2838         }
2839 }
2840
2841 static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2842                                        struct sk_buff *skb)
2843 {
2844         int num_frags, header_len, rc;
2845
2846         num_frags = skb_shinfo(skb)->nr_frags;
2847         header_len = skb_headlen(skb);
2848
2849         if (num_frags < tx_ring->sgl_size)
2850                 return 0;
2851
2852         if ((num_frags == tx_ring->sgl_size) &&
2853             (header_len < tx_ring->tx_max_header_size))
2854                 return 0;
2855
2856         u64_stats_update_begin(&tx_ring->syncp);
2857         tx_ring->tx_stats.linearize++;
2858         u64_stats_update_end(&tx_ring->syncp);
2859
2860         rc = skb_linearize(skb);
2861         if (unlikely(rc)) {
2862                 u64_stats_update_begin(&tx_ring->syncp);
2863                 tx_ring->tx_stats.linearize_failed++;
2864                 u64_stats_update_end(&tx_ring->syncp);
2865         }
2866
2867         return rc;
2868 }
2869
2870 static int ena_tx_map_skb(struct ena_ring *tx_ring,
2871                           struct ena_tx_buffer *tx_info,
2872                           struct sk_buff *skb,
2873                           void **push_hdr,
2874                           u16 *header_len)
2875 {
2876         struct ena_adapter *adapter = tx_ring->adapter;
2877         struct ena_com_buf *ena_buf;
2878         dma_addr_t dma;
2879         u32 skb_head_len, frag_len, last_frag;
2880         u16 push_len = 0;
2881         u16 delta = 0;
2882         int i = 0;
2883
2884         skb_head_len = skb_headlen(skb);
2885         tx_info->skb = skb;
2886         ena_buf = tx_info->bufs;
2887
2888         if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2889                 /* When the device is LLQ mode, the driver will copy
2890                  * the header into the device memory space.
2891                  * the ena_com layer assume the header is in a linear
2892                  * memory space.
2893                  * This assumption might be wrong since part of the header
2894                  * can be in the fragmented buffers.
2895                  * Use skb_header_pointer to make sure the header is in a
2896                  * linear memory space.
2897                  */
2898
2899                 push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2900                 *push_hdr = skb_header_pointer(skb, 0, push_len,
2901                                                tx_ring->push_buf_intermediate_buf);
2902                 *header_len = push_len;
2903                 if (unlikely(skb->data != *push_hdr)) {
2904                         u64_stats_update_begin(&tx_ring->syncp);
2905                         tx_ring->tx_stats.llq_buffer_copy++;
2906                         u64_stats_update_end(&tx_ring->syncp);
2907
2908                         delta = push_len - skb_head_len;
2909                 }
2910         } else {
2911                 *push_hdr = NULL;
2912                 *header_len = min_t(u32, skb_head_len,
2913                                     tx_ring->tx_max_header_size);
2914         }
2915
2916         netif_dbg(adapter, tx_queued, adapter->netdev,
2917                   "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2918                   *push_hdr, push_len);
2919
2920         if (skb_head_len > push_len) {
2921                 dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2922                                      skb_head_len - push_len, DMA_TO_DEVICE);
2923                 if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2924                         goto error_report_dma_error;
2925
2926                 ena_buf->paddr = dma;
2927                 ena_buf->len = skb_head_len - push_len;
2928
2929                 ena_buf++;
2930                 tx_info->num_of_bufs++;
2931                 tx_info->map_linear_data = 1;
2932         } else {
2933                 tx_info->map_linear_data = 0;
2934         }
2935
2936         last_frag = skb_shinfo(skb)->nr_frags;
2937
2938         for (i = 0; i < last_frag; i++) {
2939                 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2940
2941                 frag_len = skb_frag_size(frag);
2942
2943                 if (unlikely(delta >= frag_len)) {
2944                         delta -= frag_len;
2945                         continue;
2946                 }
2947
2948                 dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2949                                        frag_len - delta, DMA_TO_DEVICE);
2950                 if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2951                         goto error_report_dma_error;
2952
2953                 ena_buf->paddr = dma;
2954                 ena_buf->len = frag_len - delta;
2955                 ena_buf++;
2956                 tx_info->num_of_bufs++;
2957                 delta = 0;
2958         }
2959
2960         return 0;
2961
2962 error_report_dma_error:
2963         u64_stats_update_begin(&tx_ring->syncp);
2964         tx_ring->tx_stats.dma_mapping_err++;
2965         u64_stats_update_end(&tx_ring->syncp);
2966         netdev_warn(adapter->netdev, "failed to map skb\n");
2967
2968         tx_info->skb = NULL;
2969
2970         tx_info->num_of_bufs += i;
2971         ena_unmap_tx_buff(tx_ring, tx_info);
2972
2973         return -EINVAL;
2974 }
2975
2976 /* Called with netif_tx_lock. */
2977 static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
2978 {
2979         struct ena_adapter *adapter = netdev_priv(dev);
2980         struct ena_tx_buffer *tx_info;
2981         struct ena_com_tx_ctx ena_tx_ctx;
2982         struct ena_ring *tx_ring;
2983         struct netdev_queue *txq;
2984         void *push_hdr;
2985         u16 next_to_use, req_id, header_len;
2986         int qid, rc;
2987
2988         netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
2989         /*  Determine which tx ring we will be placed on */
2990         qid = skb_get_queue_mapping(skb);
2991         tx_ring = &adapter->tx_ring[qid];
2992         txq = netdev_get_tx_queue(dev, qid);
2993
2994         rc = ena_check_and_linearize_skb(tx_ring, skb);
2995         if (unlikely(rc))
2996                 goto error_drop_packet;
2997
2998         skb_tx_timestamp(skb);
2999
3000         next_to_use = tx_ring->next_to_use;
3001         req_id = tx_ring->free_ids[next_to_use];
3002         tx_info = &tx_ring->tx_buffer_info[req_id];
3003         tx_info->num_of_bufs = 0;
3004
3005         WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
3006
3007         rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
3008         if (unlikely(rc))
3009                 goto error_drop_packet;
3010
3011         memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
3012         ena_tx_ctx.ena_bufs = tx_info->bufs;
3013         ena_tx_ctx.push_header = push_hdr;
3014         ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
3015         ena_tx_ctx.req_id = req_id;
3016         ena_tx_ctx.header_len = header_len;
3017
3018         /* set flags and meta data */
3019         ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
3020
3021         rc = ena_xmit_common(dev,
3022                              tx_ring,
3023                              tx_info,
3024                              &ena_tx_ctx,
3025                              next_to_use,
3026                              skb->len);
3027         if (rc)
3028                 goto error_unmap_dma;
3029
3030         netdev_tx_sent_queue(txq, skb->len);
3031
3032         /* stop the queue when no more space available, the packet can have up
3033          * to sgl_size + 2. one for the meta descriptor and one for header
3034          * (if the header is larger than tx_max_header_size).
3035          */
3036         if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3037                                                    tx_ring->sgl_size + 2))) {
3038                 netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
3039                           __func__, qid);
3040
3041                 netif_tx_stop_queue(txq);
3042                 u64_stats_update_begin(&tx_ring->syncp);
3043                 tx_ring->tx_stats.queue_stop++;
3044                 u64_stats_update_end(&tx_ring->syncp);
3045
3046                 /* There is a rare condition where this function decide to
3047                  * stop the queue but meanwhile clean_tx_irq updates
3048                  * next_to_completion and terminates.
3049                  * The queue will remain stopped forever.
3050                  * To solve this issue add a mb() to make sure that
3051                  * netif_tx_stop_queue() write is vissible before checking if
3052                  * there is additional space in the queue.
3053                  */
3054                 smp_mb();
3055
3056                 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
3057                                                  ENA_TX_WAKEUP_THRESH)) {
3058                         netif_tx_wake_queue(txq);
3059                         u64_stats_update_begin(&tx_ring->syncp);
3060                         tx_ring->tx_stats.queue_wakeup++;
3061                         u64_stats_update_end(&tx_ring->syncp);
3062                 }
3063         }
3064
3065         if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
3066                 /* trigger the dma engine. ena_com_write_sq_doorbell()
3067                  * has a mb
3068                  */
3069                 ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
3070                 u64_stats_update_begin(&tx_ring->syncp);
3071                 tx_ring->tx_stats.doorbells++;
3072                 u64_stats_update_end(&tx_ring->syncp);
3073         }
3074
3075         return NETDEV_TX_OK;
3076
3077 error_unmap_dma:
3078         ena_unmap_tx_buff(tx_ring, tx_info);
3079         tx_info->skb = NULL;
3080
3081 error_drop_packet:
3082         dev_kfree_skb(skb);
3083         return NETDEV_TX_OK;
3084 }
3085
3086 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
3087                             struct net_device *sb_dev)
3088 {
3089         u16 qid;
3090         /* we suspect that this is good for in--kernel network services that
3091          * want to loop incoming skb rx to tx in normal user generated traffic,
3092          * most probably we will not get to this
3093          */
3094         if (skb_rx_queue_recorded(skb))
3095                 qid = skb_get_rx_queue(skb);
3096         else
3097                 qid = netdev_pick_tx(dev, skb, NULL);
3098
3099         return qid;
3100 }
3101
3102 static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3103 {
3104         struct ena_admin_host_info *host_info;
3105         int rc;
3106
3107         /* Allocate only the host info */
3108         rc = ena_com_allocate_host_info(ena_dev);
3109         if (rc) {
3110                 pr_err("Cannot allocate host info\n");
3111                 return;
3112         }
3113
3114         host_info = ena_dev->host_attr.host_info;
3115
3116         host_info->bdf = (pdev->bus->number << 8) | pdev->devfn;
3117         host_info->os_type = ENA_ADMIN_OS_LINUX;
3118         host_info->kernel_ver = LINUX_VERSION_CODE;
3119         strlcpy(host_info->kernel_ver_str, utsname()->version,
3120                 sizeof(host_info->kernel_ver_str) - 1);
3121         host_info->os_dist = 0;
3122         strncpy(host_info->os_dist_str, utsname()->release,
3123                 sizeof(host_info->os_dist_str) - 1);
3124         host_info->driver_version =
3125                 (DRV_MODULE_GEN_MAJOR) |
3126                 (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
3127                 (DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
3128                 ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
3129         host_info->num_cpus = num_online_cpus();
3130
3131         host_info->driver_supported_features =
3132                 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
3133                 ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
3134                 ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
3135                 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK;
3136
3137         rc = ena_com_set_host_attributes(ena_dev);
3138         if (rc) {
3139                 if (rc == -EOPNOTSUPP)
3140                         pr_warn("Cannot set host attributes\n");
3141                 else
3142                         pr_err("Cannot set host attributes\n");
3143
3144                 goto err;
3145         }
3146
3147         return;
3148
3149 err:
3150         ena_com_delete_host_info(ena_dev);
3151 }
3152
3153 static void ena_config_debug_area(struct ena_adapter *adapter)
3154 {
3155         u32 debug_area_size;
3156         int rc, ss_count;
3157
3158         ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
3159         if (ss_count <= 0) {
3160                 netif_err(adapter, drv, adapter->netdev,
3161                           "SS count is negative\n");
3162                 return;
3163         }
3164
3165         /* allocate 32 bytes for each string and 64bit for the value */
3166         debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
3167
3168         rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
3169         if (rc) {
3170                 pr_err("Cannot allocate debug area\n");
3171                 return;
3172         }
3173
3174         rc = ena_com_set_host_attributes(adapter->ena_dev);
3175         if (rc) {
3176                 if (rc == -EOPNOTSUPP)
3177                         netif_warn(adapter, drv, adapter->netdev,
3178                                    "Cannot set host attributes\n");
3179                 else
3180                         netif_err(adapter, drv, adapter->netdev,
3181                                   "Cannot set host attributes\n");
3182                 goto err;
3183         }
3184
3185         return;
3186 err:
3187         ena_com_delete_debug_area(adapter->ena_dev);
3188 }
3189
3190 static void ena_get_stats64(struct net_device *netdev,
3191                             struct rtnl_link_stats64 *stats)
3192 {
3193         struct ena_adapter *adapter = netdev_priv(netdev);
3194         struct ena_ring *rx_ring, *tx_ring;
3195         unsigned int start;
3196         u64 rx_drops;
3197         u64 tx_drops;
3198         int i;
3199
3200         if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3201                 return;
3202
3203         for (i = 0; i < adapter->num_io_queues; i++) {
3204                 u64 bytes, packets;
3205
3206                 tx_ring = &adapter->tx_ring[i];
3207
3208                 do {
3209                         start = u64_stats_fetch_begin_irq(&tx_ring->syncp);
3210                         packets = tx_ring->tx_stats.cnt;
3211                         bytes = tx_ring->tx_stats.bytes;
3212                 } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start));
3213
3214                 stats->tx_packets += packets;
3215                 stats->tx_bytes += bytes;
3216
3217                 rx_ring = &adapter->rx_ring[i];
3218
3219                 do {
3220                         start = u64_stats_fetch_begin_irq(&rx_ring->syncp);
3221                         packets = rx_ring->rx_stats.cnt;
3222                         bytes = rx_ring->rx_stats.bytes;
3223                 } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start));
3224
3225                 stats->rx_packets += packets;
3226                 stats->rx_bytes += bytes;
3227         }
3228
3229         do {
3230                 start = u64_stats_fetch_begin_irq(&adapter->syncp);
3231                 rx_drops = adapter->dev_stats.rx_drops;
3232                 tx_drops = adapter->dev_stats.tx_drops;
3233         } while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
3234
3235         stats->rx_dropped = rx_drops;
3236         stats->tx_dropped = tx_drops;
3237
3238         stats->multicast = 0;
3239         stats->collisions = 0;
3240
3241         stats->rx_length_errors = 0;
3242         stats->rx_crc_errors = 0;
3243         stats->rx_frame_errors = 0;
3244         stats->rx_fifo_errors = 0;
3245         stats->rx_missed_errors = 0;
3246         stats->tx_window_errors = 0;
3247
3248         stats->rx_errors = 0;
3249         stats->tx_errors = 0;
3250 }
3251
3252 static const struct net_device_ops ena_netdev_ops = {
3253         .ndo_open               = ena_open,
3254         .ndo_stop               = ena_close,
3255         .ndo_start_xmit         = ena_start_xmit,
3256         .ndo_select_queue       = ena_select_queue,
3257         .ndo_get_stats64        = ena_get_stats64,
3258         .ndo_tx_timeout         = ena_tx_timeout,
3259         .ndo_change_mtu         = ena_change_mtu,
3260         .ndo_set_mac_address    = NULL,
3261         .ndo_validate_addr      = eth_validate_addr,
3262         .ndo_bpf                = ena_xdp,
3263 };
3264
3265 static int ena_device_validate_params(struct ena_adapter *adapter,
3266                                       struct ena_com_dev_get_features_ctx *get_feat_ctx)
3267 {
3268         struct net_device *netdev = adapter->netdev;
3269         int rc;
3270
3271         rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
3272                               adapter->mac_addr);
3273         if (!rc) {
3274                 netif_err(adapter, drv, netdev,
3275                           "Error, mac address are different\n");
3276                 return -EINVAL;
3277         }
3278
3279         if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
3280                 netif_err(adapter, drv, netdev,
3281                           "Error, device max mtu is smaller than netdev MTU\n");
3282                 return -EINVAL;
3283         }
3284
3285         return 0;
3286 }
3287
3288 static void set_default_llq_configurations(struct ena_llq_configurations *llq_config)
3289 {
3290         llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3291         llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3292         llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3293         llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3294         llq_config->llq_ring_entry_size_value = 128;
3295 }
3296
3297 static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3298                                            struct ena_com_dev *ena_dev,
3299                                            struct ena_admin_feature_llq_desc *llq,
3300                                            struct ena_llq_configurations *llq_default_configurations)
3301 {
3302         int rc;
3303         u32 llq_feature_mask;
3304
3305         llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3306         if (!(ena_dev->supported_features & llq_feature_mask)) {
3307                 dev_err(&pdev->dev,
3308                         "LLQ is not supported Fallback to host mode policy.\n");
3309                 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3310                 return 0;
3311         }
3312
3313         rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3314         if (unlikely(rc)) {
3315                 dev_err(&pdev->dev,
3316                         "Failed to configure the device mode.  Fallback to host mode policy.\n");
3317                 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3318         }
3319
3320         return 0;
3321 }
3322
3323 static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
3324                                int bars)
3325 {
3326         bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
3327
3328         if (!has_mem_bar) {
3329                 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
3330                         dev_err(&pdev->dev,
3331                                 "ENA device does not expose LLQ bar. Fallback to host mode policy.\n");
3332                         ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3333                 }
3334
3335                 return 0;
3336         }
3337
3338         ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3339                                            pci_resource_start(pdev, ENA_MEM_BAR),
3340                                            pci_resource_len(pdev, ENA_MEM_BAR));
3341
3342         if (!ena_dev->mem_bar)
3343                 return -EFAULT;
3344
3345         return 0;
3346 }
3347
3348 static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
3349                            struct ena_com_dev_get_features_ctx *get_feat_ctx,
3350                            bool *wd_state)
3351 {
3352         struct ena_llq_configurations llq_config;
3353         struct device *dev = &pdev->dev;
3354         bool readless_supported;
3355         u32 aenq_groups;
3356         int dma_width;
3357         int rc;
3358
3359         rc = ena_com_mmio_reg_read_request_init(ena_dev);
3360         if (rc) {
3361                 dev_err(dev, "failed to init mmio read less\n");
3362                 return rc;
3363         }
3364
3365         /* The PCIe configuration space revision id indicate if mmio reg
3366          * read is disabled
3367          */
3368         readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
3369         ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3370
3371         rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3372         if (rc) {
3373                 dev_err(dev, "Can not reset device\n");
3374                 goto err_mmio_read_less;
3375         }
3376
3377         rc = ena_com_validate_version(ena_dev);
3378         if (rc) {
3379                 dev_err(dev, "device version is too low\n");
3380                 goto err_mmio_read_less;
3381         }
3382
3383         dma_width = ena_com_get_dma_width(ena_dev);
3384         if (dma_width < 0) {
3385                 dev_err(dev, "Invalid dma width value %d", dma_width);
3386                 rc = dma_width;
3387                 goto err_mmio_read_less;
3388         }
3389
3390         rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
3391         if (rc) {
3392                 dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
3393                 goto err_mmio_read_less;
3394         }
3395
3396         rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
3397         if (rc) {
3398                 dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
3399                         rc);
3400                 goto err_mmio_read_less;
3401         }
3402
3403         /* ENA admin level init */
3404         rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3405         if (rc) {
3406                 dev_err(dev,
3407                         "Can not initialize ena admin queue with device\n");
3408                 goto err_mmio_read_less;
3409         }
3410
3411         /* To enable the msix interrupts the driver needs to know the number
3412          * of queues. So the driver uses polling mode to retrieve this
3413          * information
3414          */
3415         ena_com_set_admin_polling_mode(ena_dev, true);
3416
3417         ena_config_host_info(ena_dev, pdev);
3418
3419         /* Get Device Attributes*/
3420         rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3421         if (rc) {
3422                 dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
3423                 goto err_admin_init;
3424         }
3425
3426         /* Try to turn all the available aenq groups */
3427         aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3428                 BIT(ENA_ADMIN_FATAL_ERROR) |
3429                 BIT(ENA_ADMIN_WARNING) |
3430                 BIT(ENA_ADMIN_NOTIFICATION) |
3431                 BIT(ENA_ADMIN_KEEP_ALIVE);
3432
3433         aenq_groups &= get_feat_ctx->aenq.supported_groups;
3434
3435         rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3436         if (rc) {
3437                 dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
3438                 goto err_admin_init;
3439         }
3440
3441         *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3442
3443         set_default_llq_configurations(&llq_config);
3444
3445         rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
3446                                              &llq_config);
3447         if (rc) {
3448                 dev_err(&pdev->dev, "ena device init failed\n");
3449                 goto err_admin_init;
3450         }
3451
3452         return 0;
3453
3454 err_admin_init:
3455         ena_com_delete_host_info(ena_dev);
3456         ena_com_admin_destroy(ena_dev);
3457 err_mmio_read_less:
3458         ena_com_mmio_reg_read_request_destroy(ena_dev);
3459
3460         return rc;
3461 }
3462
3463 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
3464 {
3465         struct ena_com_dev *ena_dev = adapter->ena_dev;
3466         struct device *dev = &adapter->pdev->dev;
3467         int rc;
3468
3469         rc = ena_enable_msix(adapter);
3470         if (rc) {
3471                 dev_err(dev, "Can not reserve msix vectors\n");
3472                 return rc;
3473         }
3474
3475         ena_setup_mgmnt_intr(adapter);
3476
3477         rc = ena_request_mgmnt_irq(adapter);
3478         if (rc) {
3479                 dev_err(dev, "Can not setup management interrupts\n");
3480                 goto err_disable_msix;
3481         }
3482
3483         ena_com_set_admin_polling_mode(ena_dev, false);
3484
3485         ena_com_admin_aenq_enable(ena_dev);
3486
3487         return 0;
3488
3489 err_disable_msix:
3490         ena_disable_msix(adapter);
3491
3492         return rc;
3493 }
3494
3495 static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3496 {
3497         struct net_device *netdev = adapter->netdev;
3498         struct ena_com_dev *ena_dev = adapter->ena_dev;
3499         bool dev_up;
3500
3501         if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3502                 return;
3503
3504         netif_carrier_off(netdev);
3505
3506         del_timer_sync(&adapter->timer_service);
3507
3508         dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
3509         adapter->dev_up_before_reset = dev_up;
3510         if (!graceful)
3511                 ena_com_set_admin_running_state(ena_dev, false);
3512
3513         if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3514                 ena_down(adapter);
3515
3516         /* Stop the device from sending AENQ events (in case reset flag is set
3517          *  and device is up, ena_down() already reset the device.
3518          */
3519         if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
3520                 ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3521
3522         ena_free_mgmnt_irq(adapter);
3523
3524         ena_disable_msix(adapter);
3525
3526         ena_com_abort_admin_commands(ena_dev);
3527
3528         ena_com_wait_for_abort_completion(ena_dev);
3529
3530         ena_com_admin_destroy(ena_dev);
3531
3532         ena_com_mmio_reg_read_request_destroy(ena_dev);
3533
3534         /* return reset reason to default value */
3535         adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3536
3537         clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3538         clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3539 }
3540
3541 static int ena_restore_device(struct ena_adapter *adapter)
3542 {
3543         struct ena_com_dev_get_features_ctx get_feat_ctx;
3544         struct ena_com_dev *ena_dev = adapter->ena_dev;
3545         struct pci_dev *pdev = adapter->pdev;
3546         bool wd_state;
3547         int rc;
3548
3549         set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3550         rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
3551         if (rc) {
3552                 dev_err(&pdev->dev, "Can not initialize device\n");
3553                 goto err;
3554         }
3555         adapter->wd_state = wd_state;
3556
3557         rc = ena_device_validate_params(adapter, &get_feat_ctx);
3558         if (rc) {
3559                 dev_err(&pdev->dev, "Validation of device parameters failed\n");
3560                 goto err_device_destroy;
3561         }
3562
3563         rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3564         if (rc) {
3565                 dev_err(&pdev->dev, "Enable MSI-X failed\n");
3566                 goto err_device_destroy;
3567         }
3568         /* If the interface was up before the reset bring it up */
3569         if (adapter->dev_up_before_reset) {
3570                 rc = ena_up(adapter);
3571                 if (rc) {
3572                         dev_err(&pdev->dev, "Failed to create I/O queues\n");
3573                         goto err_disable_msix;
3574                 }
3575         }
3576
3577         set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3578
3579         clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3580         if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
3581                 netif_carrier_on(adapter->netdev);
3582
3583         mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3584         dev_err(&pdev->dev, "Device reset completed successfully\n");
3585         adapter->last_keep_alive_jiffies = jiffies;
3586
3587         return rc;
3588 err_disable_msix:
3589         ena_free_mgmnt_irq(adapter);
3590         ena_disable_msix(adapter);
3591 err_device_destroy:
3592         ena_com_abort_admin_commands(ena_dev);
3593         ena_com_wait_for_abort_completion(ena_dev);
3594         ena_com_admin_destroy(ena_dev);
3595         ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3596         ena_com_mmio_reg_read_request_destroy(ena_dev);
3597 err:
3598         clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3599         clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3600         dev_err(&pdev->dev,
3601                 "Reset attempt failed. Can not reset the device\n");
3602
3603         return rc;
3604 }
3605
3606 static void ena_fw_reset_device(struct work_struct *work)
3607 {
3608         struct ena_adapter *adapter =
3609                 container_of(work, struct ena_adapter, reset_task);
3610         struct pci_dev *pdev = adapter->pdev;
3611
3612         if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3613                 dev_err(&pdev->dev,
3614                         "device reset schedule while reset bit is off\n");
3615                 return;
3616         }
3617         rtnl_lock();
3618         ena_destroy_device(adapter, false);
3619         ena_restore_device(adapter);
3620         rtnl_unlock();
3621 }
3622
3623 static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3624                                         struct ena_ring *rx_ring)
3625 {
3626         if (likely(rx_ring->first_interrupt))
3627                 return 0;
3628
3629         if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3630                 return 0;
3631
3632         rx_ring->no_interrupt_event_cnt++;
3633
3634         if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3635                 netif_err(adapter, rx_err, adapter->netdev,
3636                           "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3637                           rx_ring->qid);
3638                 adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3639                 smp_mb__before_atomic();
3640                 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3641                 return -EIO;
3642         }
3643
3644         return 0;
3645 }
3646
3647 static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3648                                           struct ena_ring *tx_ring)
3649 {
3650         struct ena_tx_buffer *tx_buf;
3651         unsigned long last_jiffies;
3652         u32 missed_tx = 0;
3653         int i, rc = 0;
3654
3655         for (i = 0; i < tx_ring->ring_size; i++) {
3656                 tx_buf = &tx_ring->tx_buffer_info[i];
3657                 last_jiffies = tx_buf->last_jiffies;
3658
3659                 if (last_jiffies == 0)
3660                         /* no pending Tx at this location */
3661                         continue;
3662
3663                 if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
3664                              2 * adapter->missing_tx_completion_to))) {
3665                         /* If after graceful period interrupt is still not
3666                          * received, we schedule a reset
3667                          */
3668                         netif_err(adapter, tx_err, adapter->netdev,
3669                                   "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
3670                                   tx_ring->qid);
3671                         adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
3672                         smp_mb__before_atomic();
3673                         set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3674                         return -EIO;
3675                 }
3676
3677                 if (unlikely(time_is_before_jiffies(last_jiffies +
3678                                 adapter->missing_tx_completion_to))) {
3679                         if (!tx_buf->print_once)
3680                                 netif_notice(adapter, tx_err, adapter->netdev,
3681                                              "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
3682                                              tx_ring->qid, i);
3683
3684                         tx_buf->print_once = 1;
3685                         missed_tx++;
3686                 }
3687         }
3688
3689         if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
3690                 netif_err(adapter, tx_err, adapter->netdev,
3691                           "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
3692                           missed_tx,
3693                           adapter->missing_tx_completion_threshold);
3694                 adapter->reset_reason =
3695                         ENA_REGS_RESET_MISS_TX_CMPL;
3696                 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3697                 rc = -EIO;
3698         }
3699
3700         u64_stats_update_begin(&tx_ring->syncp);
3701         tx_ring->tx_stats.missed_tx = missed_tx;
3702         u64_stats_update_end(&tx_ring->syncp);
3703
3704         return rc;
3705 }
3706
3707 static void check_for_missing_completions(struct ena_adapter *adapter)
3708 {
3709         struct ena_ring *tx_ring;
3710         struct ena_ring *rx_ring;
3711         int i, budget, rc;
3712         int io_queue_count;
3713
3714         io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
3715         /* Make sure the driver doesn't turn the device in other process */
3716         smp_rmb();
3717
3718         if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3719                 return;
3720
3721         if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3722                 return;
3723
3724         if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
3725                 return;
3726
3727         budget = ENA_MONITORED_TX_QUEUES;
3728
3729         for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
3730                 tx_ring = &adapter->tx_ring[i];
3731                 rx_ring = &adapter->rx_ring[i];
3732
3733                 rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3734                 if (unlikely(rc))
3735                         return;
3736
3737                 rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
3738                         check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
3739                 if (unlikely(rc))
3740                         return;
3741
3742                 budget--;
3743                 if (!budget)
3744                         break;
3745         }
3746
3747         adapter->last_monitored_tx_qid = i % io_queue_count;
3748 }
3749
3750 /* trigger napi schedule after 2 consecutive detections */
3751 #define EMPTY_RX_REFILL 2
3752 /* For the rare case where the device runs out of Rx descriptors and the
3753  * napi handler failed to refill new Rx descriptors (due to a lack of memory
3754  * for example).
3755  * This case will lead to a deadlock:
3756  * The device won't send interrupts since all the new Rx packets will be dropped
3757  * The napi handler won't allocate new Rx descriptors so the device will be
3758  * able to send new packets.
3759  *
3760  * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
3761  * It is recommended to have at least 512MB, with a minimum of 128MB for
3762  * constrained environment).
3763  *
3764  * When such a situation is detected - Reschedule napi
3765  */
3766 static void check_for_empty_rx_ring(struct ena_adapter *adapter)
3767 {
3768         struct ena_ring *rx_ring;
3769         int i, refill_required;
3770
3771         if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3772                 return;
3773
3774         if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3775                 return;
3776
3777         for (i = 0; i < adapter->num_io_queues; i++) {
3778                 rx_ring = &adapter->rx_ring[i];
3779
3780                 refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3781                 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3782                         rx_ring->empty_rx_queue++;
3783
3784                         if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3785                                 u64_stats_update_begin(&rx_ring->syncp);
3786                                 rx_ring->rx_stats.empty_rx_ring++;
3787                                 u64_stats_update_end(&rx_ring->syncp);
3788
3789                                 netif_err(adapter, drv, adapter->netdev,
3790                                           "trigger refill for ring %d\n", i);
3791
3792                                 napi_schedule(rx_ring->napi);
3793                                 rx_ring->empty_rx_queue = 0;
3794                         }
3795                 } else {
3796                         rx_ring->empty_rx_queue = 0;
3797                 }
3798         }
3799 }
3800
3801 /* Check for keep alive expiration */
3802 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3803 {
3804         unsigned long keep_alive_expired;
3805
3806         if (!adapter->wd_state)
3807                 return;
3808
3809         if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3810                 return;
3811
3812         keep_alive_expired = adapter->last_keep_alive_jiffies +
3813                              adapter->keep_alive_timeout;
3814         if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3815                 netif_err(adapter, drv, adapter->netdev,
3816                           "Keep alive watchdog timeout.\n");
3817                 u64_stats_update_begin(&adapter->syncp);
3818                 adapter->dev_stats.wd_expired++;
3819                 u64_stats_update_end(&adapter->syncp);
3820                 adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
3821                 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3822         }
3823 }
3824
3825 static void check_for_admin_com_state(struct ena_adapter *adapter)
3826 {
3827         if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3828                 netif_err(adapter, drv, adapter->netdev,
3829                           "ENA admin queue is not in running state!\n");
3830                 u64_stats_update_begin(&adapter->syncp);
3831                 adapter->dev_stats.admin_q_pause++;
3832                 u64_stats_update_end(&adapter->syncp);
3833                 adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
3834                 set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3835         }
3836 }
3837
3838 static void ena_update_hints(struct ena_adapter *adapter,
3839                              struct ena_admin_ena_hw_hints *hints)
3840 {
3841         struct net_device *netdev = adapter->netdev;
3842
3843         if (hints->admin_completion_tx_timeout)
3844                 adapter->ena_dev->admin_queue.completion_timeout =
3845                         hints->admin_completion_tx_timeout * 1000;
3846
3847         if (hints->mmio_read_timeout)
3848                 /* convert to usec */
3849                 adapter->ena_dev->mmio_read.reg_read_to =
3850                         hints->mmio_read_timeout * 1000;
3851
3852         if (hints->missed_tx_completion_count_threshold_to_reset)
3853                 adapter->missing_tx_completion_threshold =
3854                         hints->missed_tx_completion_count_threshold_to_reset;
3855
3856         if (hints->missing_tx_completion_timeout) {
3857                 if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3858                         adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3859                 else
3860                         adapter->missing_tx_completion_to =
3861                                 msecs_to_jiffies(hints->missing_tx_completion_timeout);
3862         }
3863
3864         if (hints->netdev_wd_timeout)
3865                 netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3866
3867         if (hints->driver_watchdog_timeout) {
3868                 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3869                         adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3870                 else
3871                         adapter->keep_alive_timeout =
3872                                 msecs_to_jiffies(hints->driver_watchdog_timeout);
3873         }
3874 }
3875
3876 static void ena_update_host_info(struct ena_admin_host_info *host_info,
3877                                  struct net_device *netdev)
3878 {
3879         host_info->supported_network_features[0] =
3880                 netdev->features & GENMASK_ULL(31, 0);
3881         host_info->supported_network_features[1] =
3882                 (netdev->features & GENMASK_ULL(63, 32)) >> 32;
3883 }
3884
3885 static void ena_timer_service(struct timer_list *t)
3886 {
3887         struct ena_adapter *adapter = from_timer(adapter, t, timer_service);
3888         u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3889         struct ena_admin_host_info *host_info =
3890                 adapter->ena_dev->host_attr.host_info;
3891
3892         check_for_missing_keep_alive(adapter);
3893
3894         check_for_admin_com_state(adapter);
3895
3896         check_for_missing_completions(adapter);
3897
3898         check_for_empty_rx_ring(adapter);
3899
3900         if (debug_area)
3901                 ena_dump_stats_to_buf(adapter, debug_area);
3902
3903         if (host_info)
3904                 ena_update_host_info(host_info, adapter->netdev);
3905
3906         if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3907                 netif_err(adapter, drv, adapter->netdev,
3908                           "Trigger reset is on\n");
3909                 ena_dump_stats_to_dmesg(adapter);
3910                 queue_work(ena_wq, &adapter->reset_task);
3911                 return;
3912         }
3913
3914         /* Reset the timer */
3915         mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3916 }
3917
3918 static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
3919                                      struct ena_com_dev *ena_dev,
3920                                      struct ena_com_dev_get_features_ctx *get_feat_ctx)
3921 {
3922         u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
3923
3924         if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3925                 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3926                         &get_feat_ctx->max_queue_ext.max_queue_ext;
3927                 io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
3928                                   max_queue_ext->max_rx_cq_num);
3929
3930                 io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3931                 io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3932         } else {
3933                 struct ena_admin_queue_feature_desc *max_queues =
3934                         &get_feat_ctx->max_queues;
3935                 io_tx_sq_num = max_queues->max_sq_num;
3936                 io_tx_cq_num = max_queues->max_cq_num;
3937                 io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
3938         }
3939
3940         /* In case of LLQ use the llq fields for the tx SQ/CQ */
3941         if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3942                 io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3943
3944         max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3945         max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
3946         max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
3947         max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
3948         /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
3949         max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
3950         if (unlikely(!max_num_io_queues)) {
3951                 dev_err(&pdev->dev, "The device doesn't have io queues\n");
3952                 return -EFAULT;
3953         }
3954
3955         return max_num_io_queues;
3956 }
3957
3958 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
3959                                  struct net_device *netdev)
3960 {
3961         netdev_features_t dev_features = 0;
3962
3963         /* Set offload features */
3964         if (feat->offload.tx &
3965                 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
3966                 dev_features |= NETIF_F_IP_CSUM;
3967
3968         if (feat->offload.tx &
3969                 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
3970                 dev_features |= NETIF_F_IPV6_CSUM;
3971
3972         if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
3973                 dev_features |= NETIF_F_TSO;
3974
3975         if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
3976                 dev_features |= NETIF_F_TSO6;
3977
3978         if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
3979                 dev_features |= NETIF_F_TSO_ECN;
3980
3981         if (feat->offload.rx_supported &
3982                 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
3983                 dev_features |= NETIF_F_RXCSUM;
3984
3985         if (feat->offload.rx_supported &
3986                 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
3987                 dev_features |= NETIF_F_RXCSUM;
3988
3989         netdev->features =
3990                 dev_features |
3991                 NETIF_F_SG |
3992                 NETIF_F_RXHASH |
3993                 NETIF_F_HIGHDMA;
3994
3995         netdev->hw_features |= netdev->features;
3996         netdev->vlan_features |= netdev->features;
3997 }
3998
3999 static void ena_set_conf_feat_params(struct ena_adapter *adapter,
4000                                      struct ena_com_dev_get_features_ctx *feat)
4001 {
4002         struct net_device *netdev = adapter->netdev;
4003
4004         /* Copy mac address */
4005         if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
4006                 eth_hw_addr_random(netdev);
4007                 ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
4008         } else {
4009                 ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
4010                 ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
4011         }
4012
4013         /* Set offload features */
4014         ena_set_dev_offloads(feat, netdev);
4015
4016         adapter->max_mtu = feat->dev_attr.max_mtu;
4017         netdev->max_mtu = adapter->max_mtu;
4018         netdev->min_mtu = ENA_MIN_MTU;
4019 }
4020
4021 static int ena_rss_init_default(struct ena_adapter *adapter)
4022 {
4023         struct ena_com_dev *ena_dev = adapter->ena_dev;
4024         struct device *dev = &adapter->pdev->dev;
4025         int rc, i;
4026         u32 val;
4027
4028         rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
4029         if (unlikely(rc)) {
4030                 dev_err(dev, "Cannot init indirect table\n");
4031                 goto err_rss_init;
4032         }
4033
4034         for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
4035                 val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
4036                 rc = ena_com_indirect_table_fill_entry(ena_dev, i,
4037                                                        ENA_IO_RXQ_IDX(val));
4038                 if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4039                         dev_err(dev, "Cannot fill indirect table\n");
4040                         goto err_fill_indir;
4041                 }
4042         }
4043
4044         rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
4045                                         ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
4046         if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4047                 dev_err(dev, "Cannot fill hash function\n");
4048                 goto err_fill_indir;
4049         }
4050
4051         rc = ena_com_set_default_hash_ctrl(ena_dev);
4052         if (unlikely(rc && (rc != -EOPNOTSUPP))) {
4053                 dev_err(dev, "Cannot fill hash control\n");
4054                 goto err_fill_indir;
4055         }
4056
4057         return 0;
4058
4059 err_fill_indir:
4060         ena_com_rss_destroy(ena_dev);
4061 err_rss_init:
4062
4063         return rc;
4064 }
4065
4066 static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
4067 {
4068         int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4069
4070         pci_release_selected_regions(pdev, release_bars);
4071 }
4072
4073
4074 static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
4075 {
4076         struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
4077         struct ena_com_dev *ena_dev = ctx->ena_dev;
4078         u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
4079         u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
4080         u32 max_tx_queue_size;
4081         u32 max_rx_queue_size;
4082
4083         if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
4084                 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
4085                         &ctx->get_feat_ctx->max_queue_ext.max_queue_ext;
4086                 max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
4087                                           max_queue_ext->max_rx_sq_depth);
4088                 max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
4089
4090                 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4091                         max_tx_queue_size = min_t(u32, max_tx_queue_size,
4092                                                   llq->max_llq_depth);
4093                 else
4094                         max_tx_queue_size = min_t(u32, max_tx_queue_size,
4095                                                   max_queue_ext->max_tx_sq_depth);
4096
4097                 ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4098                                              max_queue_ext->max_per_packet_tx_descs);
4099                 ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4100                                              max_queue_ext->max_per_packet_rx_descs);
4101         } else {
4102                 struct ena_admin_queue_feature_desc *max_queues =
4103                         &ctx->get_feat_ctx->max_queues;
4104                 max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
4105                                           max_queues->max_sq_depth);
4106                 max_tx_queue_size = max_queues->max_cq_depth;
4107
4108                 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4109                         max_tx_queue_size = min_t(u32, max_tx_queue_size,
4110                                                   llq->max_llq_depth);
4111                 else
4112                         max_tx_queue_size = min_t(u32, max_tx_queue_size,
4113                                                   max_queues->max_sq_depth);
4114
4115                 ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4116                                              max_queues->max_packet_tx_descs);
4117                 ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
4118                                              max_queues->max_packet_rx_descs);
4119         }
4120
4121         max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
4122         max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
4123
4124         tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
4125                                   max_tx_queue_size);
4126         rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
4127                                   max_rx_queue_size);
4128
4129         tx_queue_size = rounddown_pow_of_two(tx_queue_size);
4130         rx_queue_size = rounddown_pow_of_two(rx_queue_size);
4131
4132         ctx->max_tx_queue_size = max_tx_queue_size;
4133         ctx->max_rx_queue_size = max_rx_queue_size;
4134         ctx->tx_queue_size = tx_queue_size;
4135         ctx->rx_queue_size = rx_queue_size;
4136
4137         return 0;
4138 }
4139
4140 /* ena_probe - Device Initialization Routine
4141  * @pdev: PCI device information struct
4142  * @ent: entry in ena_pci_tbl
4143  *
4144  * Returns 0 on success, negative on failure
4145  *
4146  * ena_probe initializes an adapter identified by a pci_dev structure.
4147  * The OS initialization, configuring of the adapter private structure,
4148  * and a hardware reset occur.
4149  */
4150 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
4151 {
4152         struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
4153         struct ena_com_dev_get_features_ctx get_feat_ctx;
4154         struct ena_com_dev *ena_dev = NULL;
4155         struct ena_adapter *adapter;
4156         struct net_device *netdev;
4157         static int adapters_found;
4158         u32 max_num_io_queues;
4159         char *queue_type_str;
4160         bool wd_state;
4161         int bars, rc;
4162
4163         dev_dbg(&pdev->dev, "%s\n", __func__);
4164
4165         rc = pci_enable_device_mem(pdev);
4166         if (rc) {
4167                 dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
4168                 return rc;
4169         }
4170
4171         pci_set_master(pdev);
4172
4173         ena_dev = vzalloc(sizeof(*ena_dev));
4174         if (!ena_dev) {
4175                 rc = -ENOMEM;
4176                 goto err_disable_device;
4177         }
4178
4179         bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
4180         rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
4181         if (rc) {
4182                 dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
4183                         rc);
4184                 goto err_free_ena_dev;
4185         }
4186
4187         ena_dev->reg_bar = devm_ioremap(&pdev->dev,
4188                                         pci_resource_start(pdev, ENA_REG_BAR),
4189                                         pci_resource_len(pdev, ENA_REG_BAR));
4190         if (!ena_dev->reg_bar) {
4191                 dev_err(&pdev->dev, "failed to remap regs bar\n");
4192                 rc = -EFAULT;
4193                 goto err_free_region;
4194         }
4195
4196         ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
4197
4198         ena_dev->dmadev = &pdev->dev;
4199
4200         rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
4201         if (rc) {
4202                 dev_err(&pdev->dev, "ena device init failed\n");
4203                 if (rc == -ETIME)
4204                         rc = -EPROBE_DEFER;
4205                 goto err_free_region;
4206         }
4207
4208         rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
4209         if (rc) {
4210                 dev_err(&pdev->dev, "ena llq bar mapping failed\n");
4211                 goto err_free_ena_dev;
4212         }
4213
4214         calc_queue_ctx.ena_dev = ena_dev;
4215         calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
4216         calc_queue_ctx.pdev = pdev;
4217
4218         /* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
4219          * Updated during device initialization with the real granularity
4220          */
4221         ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
4222         ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
4223         ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
4224         max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
4225         rc = ena_calc_io_queue_size(&calc_queue_ctx);
4226         if (rc || !max_num_io_queues) {
4227                 rc = -EFAULT;
4228                 goto err_device_destroy;
4229         }
4230
4231         /* dev zeroed in init_etherdev */
4232         netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), max_num_io_queues);
4233         if (!netdev) {
4234                 dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
4235                 rc = -ENOMEM;
4236                 goto err_device_destroy;
4237         }
4238
4239         SET_NETDEV_DEV(netdev, &pdev->dev);
4240
4241         adapter = netdev_priv(netdev);
4242         pci_set_drvdata(pdev, adapter);
4243
4244         adapter->ena_dev = ena_dev;
4245         adapter->netdev = netdev;
4246         adapter->pdev = pdev;
4247
4248         ena_set_conf_feat_params(adapter, &get_feat_ctx);
4249
4250         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
4251         adapter->reset_reason = ENA_REGS_RESET_NORMAL;
4252
4253         adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size;
4254         adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size;
4255         adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size;
4256         adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size;
4257         adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size;
4258         adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size;
4259
4260         adapter->num_io_queues = max_num_io_queues;
4261         adapter->max_num_io_queues = max_num_io_queues;
4262         adapter->last_monitored_tx_qid = 0;
4263
4264         adapter->xdp_first_ring = 0;
4265         adapter->xdp_num_queues = 0;
4266
4267         adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4268         if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4269                 adapter->disable_meta_caching =
4270                         !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
4271                            BIT(ENA_ADMIN_DISABLE_META_CACHING));
4272
4273         adapter->wd_state = wd_state;
4274
4275         snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
4276
4277         rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
4278         if (rc) {
4279                 dev_err(&pdev->dev,
4280                         "Failed to query interrupt moderation feature\n");
4281                 goto err_netdev_destroy;
4282         }
4283         ena_init_io_rings(adapter,
4284                           0,
4285                           adapter->xdp_num_queues +
4286                           adapter->num_io_queues);
4287
4288         netdev->netdev_ops = &ena_netdev_ops;
4289         netdev->watchdog_timeo = TX_TIMEOUT;
4290         ena_set_ethtool_ops(netdev);
4291
4292         netdev->priv_flags |= IFF_UNICAST_FLT;
4293
4294         u64_stats_init(&adapter->syncp);
4295
4296         rc = ena_enable_msix_and_set_admin_interrupts(adapter);
4297         if (rc) {
4298                 dev_err(&pdev->dev,
4299                         "Failed to enable and set the admin interrupts\n");
4300                 goto err_worker_destroy;
4301         }
4302         rc = ena_rss_init_default(adapter);
4303         if (rc && (rc != -EOPNOTSUPP)) {
4304                 dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
4305                 goto err_free_msix;
4306         }
4307
4308         ena_config_debug_area(adapter);
4309
4310         memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
4311
4312         netif_carrier_off(netdev);
4313
4314         rc = register_netdev(netdev);
4315         if (rc) {
4316                 dev_err(&pdev->dev, "Cannot register net device\n");
4317                 goto err_rss;
4318         }
4319
4320         INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
4321
4322         adapter->last_keep_alive_jiffies = jiffies;
4323         adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
4324         adapter->missing_tx_completion_to = TX_TIMEOUT;
4325         adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
4326
4327         ena_update_hints(adapter, &get_feat_ctx.hw_hints);
4328
4329         timer_setup(&adapter->timer_service, ena_timer_service, 0);
4330         mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4331
4332         if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
4333                 queue_type_str = "Regular";
4334         else
4335                 queue_type_str = "Low Latency";
4336
4337         dev_info(&pdev->dev,
4338                  "%s found at mem %lx, mac addr %pM, Placement policy: %s\n",
4339                  DEVICE_NAME, (long)pci_resource_start(pdev, 0),
4340                  netdev->dev_addr, queue_type_str);
4341
4342         set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
4343
4344         adapters_found++;
4345
4346         return 0;
4347
4348 err_rss:
4349         ena_com_delete_debug_area(ena_dev);
4350         ena_com_rss_destroy(ena_dev);
4351 err_free_msix:
4352         ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4353         /* stop submitting admin commands on a device that was reset */
4354         ena_com_set_admin_running_state(ena_dev, false);
4355         ena_free_mgmnt_irq(adapter);
4356         ena_disable_msix(adapter);
4357 err_worker_destroy:
4358         del_timer(&adapter->timer_service);
4359 err_netdev_destroy:
4360         free_netdev(netdev);
4361 err_device_destroy:
4362         ena_com_delete_host_info(ena_dev);
4363         ena_com_admin_destroy(ena_dev);
4364 err_free_region:
4365         ena_release_bars(ena_dev, pdev);
4366 err_free_ena_dev:
4367         vfree(ena_dev);
4368 err_disable_device:
4369         pci_disable_device(pdev);
4370         return rc;
4371 }
4372
4373 /*****************************************************************************/
4374
4375 /* __ena_shutoff - Helper used in both PCI remove/shutdown routines
4376  * @pdev: PCI device information struct
4377  * @shutdown: Is it a shutdown operation? If false, means it is a removal
4378  *
4379  * __ena_shutoff is a helper routine that does the real work on shutdown and
4380  * removal paths; the difference between those paths is with regards to whether
4381  * dettach or unregister the netdevice.
4382  */
4383 static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
4384 {
4385         struct ena_adapter *adapter = pci_get_drvdata(pdev);
4386         struct ena_com_dev *ena_dev;
4387         struct net_device *netdev;
4388
4389         ena_dev = adapter->ena_dev;
4390         netdev = adapter->netdev;
4391
4392 #ifdef CONFIG_RFS_ACCEL
4393         if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
4394                 free_irq_cpu_rmap(netdev->rx_cpu_rmap);
4395                 netdev->rx_cpu_rmap = NULL;
4396         }
4397 #endif /* CONFIG_RFS_ACCEL */
4398         del_timer_sync(&adapter->timer_service);
4399
4400         cancel_work_sync(&adapter->reset_task);
4401
4402         rtnl_lock(); /* lock released inside the below if-else block */
4403         adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
4404         ena_destroy_device(adapter, true);
4405         if (shutdown) {
4406                 netif_device_detach(netdev);
4407                 dev_close(netdev);
4408                 rtnl_unlock();
4409         } else {
4410                 rtnl_unlock();
4411                 unregister_netdev(netdev);
4412                 free_netdev(netdev);
4413         }
4414
4415         ena_com_rss_destroy(ena_dev);
4416
4417         ena_com_delete_debug_area(ena_dev);
4418
4419         ena_com_delete_host_info(ena_dev);
4420
4421         ena_release_bars(ena_dev, pdev);
4422
4423         pci_disable_device(pdev);
4424
4425         vfree(ena_dev);
4426 }
4427
4428 /* ena_remove - Device Removal Routine
4429  * @pdev: PCI device information struct
4430  *
4431  * ena_remove is called by the PCI subsystem to alert the driver
4432  * that it should release a PCI device.
4433  */
4434
4435 static void ena_remove(struct pci_dev *pdev)
4436 {
4437         __ena_shutoff(pdev, false);
4438 }
4439
4440 /* ena_shutdown - Device Shutdown Routine
4441  * @pdev: PCI device information struct
4442  *
4443  * ena_shutdown is called by the PCI subsystem to alert the driver that
4444  * a shutdown/reboot (or kexec) is happening and device must be disabled.
4445  */
4446
4447 static void ena_shutdown(struct pci_dev *pdev)
4448 {
4449         __ena_shutoff(pdev, true);
4450 }
4451
4452 /* ena_suspend - PM suspend callback
4453  * @dev_d: Device information struct
4454  */
4455 static int __maybe_unused ena_suspend(struct device *dev_d)
4456 {
4457         struct pci_dev *pdev = to_pci_dev(dev_d);
4458         struct ena_adapter *adapter = pci_get_drvdata(pdev);
4459
4460         u64_stats_update_begin(&adapter->syncp);
4461         adapter->dev_stats.suspend++;
4462         u64_stats_update_end(&adapter->syncp);
4463
4464         rtnl_lock();
4465         if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4466                 dev_err(&pdev->dev,
4467                         "ignoring device reset request as the device is being suspended\n");
4468                 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
4469         }
4470         ena_destroy_device(adapter, true);
4471         rtnl_unlock();
4472         return 0;
4473 }
4474
4475 /* ena_resume - PM resume callback
4476  * @dev_d: Device information struct
4477  */
4478 static int __maybe_unused ena_resume(struct device *dev_d)
4479 {
4480         struct ena_adapter *adapter = dev_get_drvdata(dev_d);
4481         int rc;
4482
4483         u64_stats_update_begin(&adapter->syncp);
4484         adapter->dev_stats.resume++;
4485         u64_stats_update_end(&adapter->syncp);
4486
4487         rtnl_lock();
4488         rc = ena_restore_device(adapter);
4489         rtnl_unlock();
4490         return rc;
4491 }
4492
4493 static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
4494
4495 static struct pci_driver ena_pci_driver = {
4496         .name           = DRV_MODULE_NAME,
4497         .id_table       = ena_pci_tbl,
4498         .probe          = ena_probe,
4499         .remove         = ena_remove,
4500         .shutdown       = ena_shutdown,
4501         .driver.pm      = &ena_pm_ops,
4502         .sriov_configure = pci_sriov_configure_simple,
4503 };
4504
4505 static int __init ena_init(void)
4506 {
4507         ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
4508         if (!ena_wq) {
4509                 pr_err("Failed to create workqueue\n");
4510                 return -ENOMEM;
4511         }
4512
4513         return pci_register_driver(&ena_pci_driver);
4514 }
4515
4516 static void __exit ena_cleanup(void)
4517 {
4518         pci_unregister_driver(&ena_pci_driver);
4519
4520         if (ena_wq) {
4521                 destroy_workqueue(ena_wq);
4522                 ena_wq = NULL;
4523         }
4524 }
4525
4526 /******************************************************************************
4527  ******************************** AENQ Handlers *******************************
4528  *****************************************************************************/
4529 /* ena_update_on_link_change:
4530  * Notify the network interface about the change in link status
4531  */
4532 static void ena_update_on_link_change(void *adapter_data,
4533                                       struct ena_admin_aenq_entry *aenq_e)
4534 {
4535         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4536         struct ena_admin_aenq_link_change_desc *aenq_desc =
4537                 (struct ena_admin_aenq_link_change_desc *)aenq_e;
4538         int status = aenq_desc->flags &
4539                 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4540
4541         if (status) {
4542                 netdev_dbg(adapter->netdev, "%s\n", __func__);
4543                 set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4544                 if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
4545                         netif_carrier_on(adapter->netdev);
4546         } else {
4547                 clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4548                 netif_carrier_off(adapter->netdev);
4549         }
4550 }
4551
4552 static void ena_keep_alive_wd(void *adapter_data,
4553                               struct ena_admin_aenq_entry *aenq_e)
4554 {
4555         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4556         struct ena_admin_aenq_keep_alive_desc *desc;
4557         u64 rx_drops;
4558         u64 tx_drops;
4559
4560         desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4561         adapter->last_keep_alive_jiffies = jiffies;
4562
4563         rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4564         tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
4565
4566         u64_stats_update_begin(&adapter->syncp);
4567         adapter->dev_stats.rx_drops = rx_drops;
4568         adapter->dev_stats.tx_drops = tx_drops;
4569         u64_stats_update_end(&adapter->syncp);
4570 }
4571
4572 static void ena_notification(void *adapter_data,
4573                              struct ena_admin_aenq_entry *aenq_e)
4574 {
4575         struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4576         struct ena_admin_ena_hw_hints *hints;
4577
4578         WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4579              "Invalid group(%x) expected %x\n",
4580              aenq_e->aenq_common_desc.group,
4581              ENA_ADMIN_NOTIFICATION);
4582
4583         switch (aenq_e->aenq_common_desc.syndrom) {
4584         case ENA_ADMIN_UPDATE_HINTS:
4585                 hints = (struct ena_admin_ena_hw_hints *)
4586                         (&aenq_e->inline_data_w4);
4587                 ena_update_hints(adapter, hints);
4588                 break;
4589         default:
4590                 netif_err(adapter, drv, adapter->netdev,
4591                           "Invalid aenq notification link state %d\n",
4592                           aenq_e->aenq_common_desc.syndrom);
4593         }
4594 }
4595
4596 /* This handler will called for unknown event group or unimplemented handlers*/
4597 static void unimplemented_aenq_handler(void *data,
4598                                        struct ena_admin_aenq_entry *aenq_e)
4599 {
4600         struct ena_adapter *adapter = (struct ena_adapter *)data;
4601
4602         netif_err(adapter, drv, adapter->netdev,
4603                   "Unknown event was received or event with unimplemented handler\n");
4604 }
4605
4606 static struct ena_aenq_handlers aenq_handlers = {
4607         .handlers = {
4608                 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4609                 [ENA_ADMIN_NOTIFICATION] = ena_notification,
4610                 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4611         },
4612         .unimplemented_handler = unimplemented_aenq_handler
4613 };
4614
4615 module_init(ena_init);
4616 module_exit(ena_cleanup);