c5ad7a4f4d836b41c06e747438006a7d9c71c8a6
[platform/kernel/linux-rpi.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50 #ifdef CONFIG_BE2NET_BE2
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 #endif /* CONFIG_BE2NET_BE2 */
54 #ifdef CONFIG_BE2NET_BE3
55         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
56         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
57 #endif /* CONFIG_BE2NET_BE3 */
58 #ifdef CONFIG_BE2NET_LANCER
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
60         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
61 #endif /* CONFIG_BE2NET_LANCER */
62 #ifdef CONFIG_BE2NET_SKYHAWK
63         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
64         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
65 #endif /* CONFIG_BE2NET_SKYHAWK */
66         { 0 }
67 };
68 MODULE_DEVICE_TABLE(pci, be_dev_ids);
69
70 /* Workqueue used by all functions for defering cmd calls to the adapter */
71 static struct workqueue_struct *be_wq;
72
73 /* UE Status Low CSR */
74 static const char * const ue_status_low_desc[] = {
75         "CEV",
76         "CTX",
77         "DBUF",
78         "ERX",
79         "Host",
80         "MPU",
81         "NDMA",
82         "PTC ",
83         "RDMA ",
84         "RXF ",
85         "RXIPS ",
86         "RXULP0 ",
87         "RXULP1 ",
88         "RXULP2 ",
89         "TIM ",
90         "TPOST ",
91         "TPRE ",
92         "TXIPS ",
93         "TXULP0 ",
94         "TXULP1 ",
95         "UC ",
96         "WDMA ",
97         "TXULP2 ",
98         "HOST1 ",
99         "P0_OB_LINK ",
100         "P1_OB_LINK ",
101         "HOST_GPIO ",
102         "MBOX ",
103         "ERX2 ",
104         "SPARE ",
105         "JTAG ",
106         "MPU_INTPEND "
107 };
108
109 /* UE Status High CSR */
110 static const char * const ue_status_hi_desc[] = {
111         "LPCMEMHOST",
112         "MGMT_MAC",
113         "PCS0ONLINE",
114         "MPU_IRAM",
115         "PCS1ONLINE",
116         "PCTL0",
117         "PCTL1",
118         "PMEM",
119         "RR",
120         "TXPB",
121         "RXPP",
122         "XAUI",
123         "TXP",
124         "ARM",
125         "IPC",
126         "HOST2",
127         "HOST3",
128         "HOST4",
129         "HOST5",
130         "HOST6",
131         "HOST7",
132         "ECRC",
133         "Poison TLP",
134         "NETC",
135         "PERIPH",
136         "LLTXULP",
137         "D2P",
138         "RCON",
139         "LDMA",
140         "LLTXP",
141         "LLTXPB",
142         "Unknown"
143 };
144
145 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
146                                  BE_IF_FLAGS_BROADCAST | \
147                                  BE_IF_FLAGS_MULTICAST | \
148                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
149
150 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
151 {
152         struct be_dma_mem *mem = &q->dma_mem;
153
154         if (mem->va) {
155                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
156                                   mem->dma);
157                 mem->va = NULL;
158         }
159 }
160
161 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
162                           u16 len, u16 entry_size)
163 {
164         struct be_dma_mem *mem = &q->dma_mem;
165
166         memset(q, 0, sizeof(*q));
167         q->len = len;
168         q->entry_size = entry_size;
169         mem->size = len * entry_size;
170         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
171                                       GFP_KERNEL);
172         if (!mem->va)
173                 return -ENOMEM;
174         return 0;
175 }
176
177 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
178 {
179         u32 reg, enabled;
180
181         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
182                               &reg);
183         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184
185         if (!enabled && enable)
186                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
187         else if (enabled && !enable)
188                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
189         else
190                 return;
191
192         pci_write_config_dword(adapter->pdev,
193                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
194 }
195
196 static void be_intr_set(struct be_adapter *adapter, bool enable)
197 {
198         int status = 0;
199
200         /* On lancer interrupts can't be controlled via this register */
201         if (lancer_chip(adapter))
202                 return;
203
204         if (be_check_error(adapter, BE_ERROR_EEH))
205                 return;
206
207         status = be_cmd_intr_set(adapter, enable);
208         if (status)
209                 be_reg_intr_set(adapter, enable);
210 }
211
212 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
213 {
214         u32 val = 0;
215
216         if (be_check_error(adapter, BE_ERROR_HW))
217                 return;
218
219         val |= qid & DB_RQ_RING_ID_MASK;
220         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
221
222         wmb();
223         iowrite32(val, adapter->db + DB_RQ_OFFSET);
224 }
225
226 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
227                           u16 posted)
228 {
229         u32 val = 0;
230
231         if (be_check_error(adapter, BE_ERROR_HW))
232                 return;
233
234         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
235         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
236
237         wmb();
238         iowrite32(val, adapter->db + txo->db_offset);
239 }
240
241 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
242                          bool arm, bool clear_int, u16 num_popped,
243                          u32 eq_delay_mult_enc)
244 {
245         u32 val = 0;
246
247         val |= qid & DB_EQ_RING_ID_MASK;
248         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
249
250         if (be_check_error(adapter, BE_ERROR_HW))
251                 return;
252
253         if (arm)
254                 val |= 1 << DB_EQ_REARM_SHIFT;
255         if (clear_int)
256                 val |= 1 << DB_EQ_CLR_SHIFT;
257         val |= 1 << DB_EQ_EVNT_SHIFT;
258         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
259         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
260         iowrite32(val, adapter->db + DB_EQ_OFFSET);
261 }
262
263 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
264 {
265         u32 val = 0;
266
267         val |= qid & DB_CQ_RING_ID_MASK;
268         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
269                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
270
271         if (be_check_error(adapter, BE_ERROR_HW))
272                 return;
273
274         if (arm)
275                 val |= 1 << DB_CQ_REARM_SHIFT;
276         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
277         iowrite32(val, adapter->db + DB_CQ_OFFSET);
278 }
279
280 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
281 {
282         int i;
283
284         /* Check if mac has already been added as part of uc-list */
285         for (i = 0; i < adapter->uc_macs; i++) {
286                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
287                         /* mac already added, skip addition */
288                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
289                         return 0;
290                 }
291         }
292
293         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
294                                &adapter->pmac_id[0], 0);
295 }
296
297 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
298 {
299         int i;
300
301         /* Skip deletion if the programmed mac is
302          * being used in uc-list
303          */
304         for (i = 0; i < adapter->uc_macs; i++) {
305                 if (adapter->pmac_id[i + 1] == pmac_id)
306                         return;
307         }
308         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
309 }
310
311 static int be_mac_addr_set(struct net_device *netdev, void *p)
312 {
313         struct be_adapter *adapter = netdev_priv(netdev);
314         struct device *dev = &adapter->pdev->dev;
315         struct sockaddr *addr = p;
316         int status;
317         u8 mac[ETH_ALEN];
318         u32 old_pmac_id = adapter->pmac_id[0];
319
320         if (!is_valid_ether_addr(addr->sa_data))
321                 return -EADDRNOTAVAIL;
322
323         /* Proceed further only if, User provided MAC is different
324          * from active MAC
325          */
326         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
327                 return 0;
328
329         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
330          * address
331          */
332         if (BEx_chip(adapter) && be_virtfn(adapter) &&
333             !check_privilege(adapter, BE_PRIV_FILTMGMT))
334                 return -EPERM;
335
336         /* if device is not running, copy MAC to netdev->dev_addr */
337         if (!netif_running(netdev))
338                 goto done;
339
340         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
341          * privilege or if PF did not provision the new MAC address.
342          * On BE3, this cmd will always fail if the VF doesn't have the
343          * FILTMGMT privilege. This failure is OK, only if the PF programmed
344          * the MAC for the VF.
345          */
346         mutex_lock(&adapter->rx_filter_lock);
347         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
348         if (!status) {
349
350                 /* Delete the old programmed MAC. This call may fail if the
351                  * old MAC was already deleted by the PF driver.
352                  */
353                 if (adapter->pmac_id[0] != old_pmac_id)
354                         be_dev_mac_del(adapter, old_pmac_id);
355         }
356
357         mutex_unlock(&adapter->rx_filter_lock);
358         /* Decide if the new MAC is successfully activated only after
359          * querying the FW
360          */
361         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
362                                        adapter->if_handle, true, 0);
363         if (status)
364                 goto err;
365
366         /* The MAC change did not happen, either due to lack of privilege
367          * or PF didn't pre-provision.
368          */
369         if (!ether_addr_equal(addr->sa_data, mac)) {
370                 status = -EPERM;
371                 goto err;
372         }
373
374         /* Remember currently programmed MAC */
375         ether_addr_copy(adapter->dev_mac, addr->sa_data);
376 done:
377         ether_addr_copy(netdev->dev_addr, addr->sa_data);
378         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
379         return 0;
380 err:
381         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
382         return status;
383 }
384
385 /* BE2 supports only v0 cmd */
386 static void *hw_stats_from_cmd(struct be_adapter *adapter)
387 {
388         if (BE2_chip(adapter)) {
389                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         } else if (BE3_chip(adapter)) {
393                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
394
395                 return &cmd->hw_stats;
396         } else {
397                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
398
399                 return &cmd->hw_stats;
400         }
401 }
402
403 /* BE2 supports only v0 cmd */
404 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
405 {
406         if (BE2_chip(adapter)) {
407                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         } else if (BE3_chip(adapter)) {
411                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
412
413                 return &hw_stats->erx;
414         } else {
415                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
416
417                 return &hw_stats->erx;
418         }
419 }
420
421 static void populate_be_v0_stats(struct be_adapter *adapter)
422 {
423         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
424         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
425         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
426         struct be_port_rxf_stats_v0 *port_stats =
427                                         &rxf_stats->port[adapter->port_num];
428         struct be_drv_stats *drvs = &adapter->drv_stats;
429
430         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
431         drvs->rx_pause_frames = port_stats->rx_pause_frames;
432         drvs->rx_crc_errors = port_stats->rx_crc_errors;
433         drvs->rx_control_frames = port_stats->rx_control_frames;
434         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
435         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
436         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
437         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
438         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
439         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
440         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
441         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
442         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
443         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
444         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
445         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
446         drvs->rx_dropped_header_too_small =
447                 port_stats->rx_dropped_header_too_small;
448         drvs->rx_address_filtered =
449                                         port_stats->rx_address_filtered +
450                                         port_stats->rx_vlan_filtered;
451         drvs->rx_alignment_symbol_errors =
452                 port_stats->rx_alignment_symbol_errors;
453
454         drvs->tx_pauseframes = port_stats->tx_pauseframes;
455         drvs->tx_controlframes = port_stats->tx_controlframes;
456
457         if (adapter->port_num)
458                 drvs->jabber_events = rxf_stats->port1_jabber_events;
459         else
460                 drvs->jabber_events = rxf_stats->port0_jabber_events;
461         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
462         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
463         drvs->forwarded_packets = rxf_stats->forwarded_packets;
464         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
465         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
466         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
467         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
468 }
469
470 static void populate_be_v1_stats(struct be_adapter *adapter)
471 {
472         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
473         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
474         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
475         struct be_port_rxf_stats_v1 *port_stats =
476                                         &rxf_stats->port[adapter->port_num];
477         struct be_drv_stats *drvs = &adapter->drv_stats;
478
479         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
480         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
481         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
482         drvs->rx_pause_frames = port_stats->rx_pause_frames;
483         drvs->rx_crc_errors = port_stats->rx_crc_errors;
484         drvs->rx_control_frames = port_stats->rx_control_frames;
485         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
486         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
487         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
488         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
489         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
490         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
491         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
492         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
493         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
494         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
495         drvs->rx_dropped_header_too_small =
496                 port_stats->rx_dropped_header_too_small;
497         drvs->rx_input_fifo_overflow_drop =
498                 port_stats->rx_input_fifo_overflow_drop;
499         drvs->rx_address_filtered = port_stats->rx_address_filtered;
500         drvs->rx_alignment_symbol_errors =
501                 port_stats->rx_alignment_symbol_errors;
502         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
503         drvs->tx_pauseframes = port_stats->tx_pauseframes;
504         drvs->tx_controlframes = port_stats->tx_controlframes;
505         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
506         drvs->jabber_events = port_stats->jabber_events;
507         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
508         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
509         drvs->forwarded_packets = rxf_stats->forwarded_packets;
510         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
511         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
512         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
513         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
514 }
515
516 static void populate_be_v2_stats(struct be_adapter *adapter)
517 {
518         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
519         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
520         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
521         struct be_port_rxf_stats_v2 *port_stats =
522                                         &rxf_stats->port[adapter->port_num];
523         struct be_drv_stats *drvs = &adapter->drv_stats;
524
525         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
526         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
527         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
528         drvs->rx_pause_frames = port_stats->rx_pause_frames;
529         drvs->rx_crc_errors = port_stats->rx_crc_errors;
530         drvs->rx_control_frames = port_stats->rx_control_frames;
531         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
532         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
533         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
534         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
535         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
536         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
537         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
538         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
539         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
540         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
541         drvs->rx_dropped_header_too_small =
542                 port_stats->rx_dropped_header_too_small;
543         drvs->rx_input_fifo_overflow_drop =
544                 port_stats->rx_input_fifo_overflow_drop;
545         drvs->rx_address_filtered = port_stats->rx_address_filtered;
546         drvs->rx_alignment_symbol_errors =
547                 port_stats->rx_alignment_symbol_errors;
548         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
549         drvs->tx_pauseframes = port_stats->tx_pauseframes;
550         drvs->tx_controlframes = port_stats->tx_controlframes;
551         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
552         drvs->jabber_events = port_stats->jabber_events;
553         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
554         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
555         drvs->forwarded_packets = rxf_stats->forwarded_packets;
556         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
557         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
558         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
559         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
560         if (be_roce_supported(adapter)) {
561                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
562                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
563                 drvs->rx_roce_frames = port_stats->roce_frames_received;
564                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
565                 drvs->roce_drops_payload_len =
566                         port_stats->roce_drops_payload_len;
567         }
568 }
569
570 static void populate_lancer_stats(struct be_adapter *adapter)
571 {
572         struct be_drv_stats *drvs = &adapter->drv_stats;
573         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
574
575         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
576         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
577         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
578         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
579         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
580         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
581         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
582         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
583         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
584         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
585         drvs->rx_dropped_tcp_length =
586                                 pport_stats->rx_dropped_invalid_tcp_length;
587         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
588         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
589         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
590         drvs->rx_dropped_header_too_small =
591                                 pport_stats->rx_dropped_header_too_small;
592         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->rx_address_filtered =
594                                         pport_stats->rx_address_filtered +
595                                         pport_stats->rx_vlan_filtered;
596         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
597         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
598         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
599         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
600         drvs->jabber_events = pport_stats->rx_jabbers;
601         drvs->forwarded_packets = pport_stats->num_forwards_lo;
602         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
603         drvs->rx_drops_too_many_frags =
604                                 pport_stats->rx_drops_too_many_frags_lo;
605 }
606
607 static void accumulate_16bit_val(u32 *acc, u16 val)
608 {
609 #define lo(x)                   (x & 0xFFFF)
610 #define hi(x)                   (x & 0xFFFF0000)
611         bool wrapped = val < lo(*acc);
612         u32 newacc = hi(*acc) + val;
613
614         if (wrapped)
615                 newacc += 65536;
616         WRITE_ONCE(*acc, newacc);
617 }
618
619 static void populate_erx_stats(struct be_adapter *adapter,
620                                struct be_rx_obj *rxo, u32 erx_stat)
621 {
622         if (!BEx_chip(adapter))
623                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
624         else
625                 /* below erx HW counter can actually wrap around after
626                  * 65535. Driver accumulates a 32-bit value
627                  */
628                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
629                                      (u16)erx_stat);
630 }
631
632 void be_parse_stats(struct be_adapter *adapter)
633 {
634         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
635         struct be_rx_obj *rxo;
636         int i;
637         u32 erx_stat;
638
639         if (lancer_chip(adapter)) {
640                 populate_lancer_stats(adapter);
641         } else {
642                 if (BE2_chip(adapter))
643                         populate_be_v0_stats(adapter);
644                 else if (BE3_chip(adapter))
645                         /* for BE3 */
646                         populate_be_v1_stats(adapter);
647                 else
648                         populate_be_v2_stats(adapter);
649
650                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
651                 for_all_rx_queues(adapter, rxo, i) {
652                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
653                         populate_erx_stats(adapter, rxo, erx_stat);
654                 }
655         }
656 }
657
658 static void be_get_stats64(struct net_device *netdev,
659                            struct rtnl_link_stats64 *stats)
660 {
661         struct be_adapter *adapter = netdev_priv(netdev);
662         struct be_drv_stats *drvs = &adapter->drv_stats;
663         struct be_rx_obj *rxo;
664         struct be_tx_obj *txo;
665         u64 pkts, bytes;
666         unsigned int start;
667         int i;
668
669         for_all_rx_queues(adapter, rxo, i) {
670                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
671
672                 do {
673                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
674                         pkts = rx_stats(rxo)->rx_pkts;
675                         bytes = rx_stats(rxo)->rx_bytes;
676                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
677                 stats->rx_packets += pkts;
678                 stats->rx_bytes += bytes;
679                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
680                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
681                                         rx_stats(rxo)->rx_drops_no_frags;
682         }
683
684         for_all_tx_queues(adapter, txo, i) {
685                 const struct be_tx_stats *tx_stats = tx_stats(txo);
686
687                 do {
688                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
689                         pkts = tx_stats(txo)->tx_pkts;
690                         bytes = tx_stats(txo)->tx_bytes;
691                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
692                 stats->tx_packets += pkts;
693                 stats->tx_bytes += bytes;
694         }
695
696         /* bad pkts received */
697         stats->rx_errors = drvs->rx_crc_errors +
698                 drvs->rx_alignment_symbol_errors +
699                 drvs->rx_in_range_errors +
700                 drvs->rx_out_range_errors +
701                 drvs->rx_frame_too_long +
702                 drvs->rx_dropped_too_small +
703                 drvs->rx_dropped_too_short +
704                 drvs->rx_dropped_header_too_small +
705                 drvs->rx_dropped_tcp_length +
706                 drvs->rx_dropped_runt;
707
708         /* detailed rx errors */
709         stats->rx_length_errors = drvs->rx_in_range_errors +
710                 drvs->rx_out_range_errors +
711                 drvs->rx_frame_too_long;
712
713         stats->rx_crc_errors = drvs->rx_crc_errors;
714
715         /* frame alignment errors */
716         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
717
718         /* receiver fifo overrun */
719         /* drops_no_pbuf is no per i/f, it's per BE card */
720         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
721                                 drvs->rx_input_fifo_overflow_drop +
722                                 drvs->rx_drops_no_pbuf;
723 }
724
725 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
726 {
727         struct net_device *netdev = adapter->netdev;
728
729         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
730                 netif_carrier_off(netdev);
731                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
732         }
733
734         if (link_status)
735                 netif_carrier_on(netdev);
736         else
737                 netif_carrier_off(netdev);
738
739         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
740 }
741
742 static int be_gso_hdr_len(struct sk_buff *skb)
743 {
744         if (skb->encapsulation)
745                 return skb_inner_transport_offset(skb) +
746                        inner_tcp_hdrlen(skb);
747         return skb_transport_offset(skb) + tcp_hdrlen(skb);
748 }
749
750 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
751 {
752         struct be_tx_stats *stats = tx_stats(txo);
753         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
754         /* Account for headers which get duplicated in TSO pkt */
755         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
756
757         u64_stats_update_begin(&stats->sync);
758         stats->tx_reqs++;
759         stats->tx_bytes += skb->len + dup_hdr_len;
760         stats->tx_pkts += tx_pkts;
761         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
762                 stats->tx_vxlan_offload_pkts += tx_pkts;
763         u64_stats_update_end(&stats->sync);
764 }
765
766 /* Returns number of WRBs needed for the skb */
767 static u32 skb_wrb_cnt(struct sk_buff *skb)
768 {
769         /* +1 for the header wrb */
770         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
771 }
772
773 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
774 {
775         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
776         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
777         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
778         wrb->rsvd0 = 0;
779 }
780
781 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
782  * to avoid the swap and shift/mask operations in wrb_fill().
783  */
784 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
785 {
786         wrb->frag_pa_hi = 0;
787         wrb->frag_pa_lo = 0;
788         wrb->frag_len = 0;
789         wrb->rsvd0 = 0;
790 }
791
792 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
793                                      struct sk_buff *skb)
794 {
795         u8 vlan_prio;
796         u16 vlan_tag;
797
798         vlan_tag = skb_vlan_tag_get(skb);
799         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
800         /* If vlan priority provided by OS is NOT in available bmap */
801         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
802                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
803                                 adapter->recommended_prio_bits;
804
805         return vlan_tag;
806 }
807
808 /* Used only for IP tunnel packets */
809 static u16 skb_inner_ip_proto(struct sk_buff *skb)
810 {
811         return (inner_ip_hdr(skb)->version == 4) ?
812                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
813 }
814
815 static u16 skb_ip_proto(struct sk_buff *skb)
816 {
817         return (ip_hdr(skb)->version == 4) ?
818                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
819 }
820
821 static inline bool be_is_txq_full(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
824 }
825
826 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) < txo->q.len / 2;
829 }
830
831 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
832 {
833         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
834 }
835
836 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
837                                        struct sk_buff *skb,
838                                        struct be_wrb_params *wrb_params)
839 {
840         u16 proto;
841
842         if (skb_is_gso(skb)) {
843                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
844                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
845                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
846                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
847         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
848                 if (skb->encapsulation) {
849                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
850                         proto = skb_inner_ip_proto(skb);
851                 } else {
852                         proto = skb_ip_proto(skb);
853                 }
854                 if (proto == IPPROTO_TCP)
855                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
856                 else if (proto == IPPROTO_UDP)
857                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
858         }
859
860         if (skb_vlan_tag_present(skb)) {
861                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
862                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
863         }
864
865         BE_WRB_F_SET(wrb_params->features, CRC, 1);
866 }
867
868 static void wrb_fill_hdr(struct be_adapter *adapter,
869                          struct be_eth_hdr_wrb *hdr,
870                          struct be_wrb_params *wrb_params,
871                          struct sk_buff *skb)
872 {
873         memset(hdr, 0, sizeof(*hdr));
874
875         SET_TX_WRB_HDR_BITS(crc, hdr,
876                             BE_WRB_F_GET(wrb_params->features, CRC));
877         SET_TX_WRB_HDR_BITS(ipcs, hdr,
878                             BE_WRB_F_GET(wrb_params->features, IPCS));
879         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
880                             BE_WRB_F_GET(wrb_params->features, TCPCS));
881         SET_TX_WRB_HDR_BITS(udpcs, hdr,
882                             BE_WRB_F_GET(wrb_params->features, UDPCS));
883
884         SET_TX_WRB_HDR_BITS(lso, hdr,
885                             BE_WRB_F_GET(wrb_params->features, LSO));
886         SET_TX_WRB_HDR_BITS(lso6, hdr,
887                             BE_WRB_F_GET(wrb_params->features, LSO6));
888         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
889
890         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
891          * hack is not needed, the evt bit is set while ringing DB.
892          */
893         SET_TX_WRB_HDR_BITS(event, hdr,
894                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
895         SET_TX_WRB_HDR_BITS(vlan, hdr,
896                             BE_WRB_F_GET(wrb_params->features, VLAN));
897         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
898
899         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
900         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
901         SET_TX_WRB_HDR_BITS(mgmt, hdr,
902                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
903 }
904
905 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
906                           bool unmap_single)
907 {
908         dma_addr_t dma;
909         u32 frag_len = le32_to_cpu(wrb->frag_len);
910
911
912         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
913                 (u64)le32_to_cpu(wrb->frag_pa_lo);
914         if (frag_len) {
915                 if (unmap_single)
916                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
917                 else
918                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
919         }
920 }
921
922 /* Grab a WRB header for xmit */
923 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
924 {
925         u32 head = txo->q.head;
926
927         queue_head_inc(&txo->q);
928         return head;
929 }
930
931 /* Set up the WRB header for xmit */
932 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
933                                 struct be_tx_obj *txo,
934                                 struct be_wrb_params *wrb_params,
935                                 struct sk_buff *skb, u16 head)
936 {
937         u32 num_frags = skb_wrb_cnt(skb);
938         struct be_queue_info *txq = &txo->q;
939         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
940
941         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
942         be_dws_cpu_to_le(hdr, sizeof(*hdr));
943
944         BUG_ON(txo->sent_skb_list[head]);
945         txo->sent_skb_list[head] = skb;
946         txo->last_req_hdr = head;
947         atomic_add(num_frags, &txq->used);
948         txo->last_req_wrb_cnt = num_frags;
949         txo->pend_wrb_cnt += num_frags;
950 }
951
952 /* Setup a WRB fragment (buffer descriptor) for xmit */
953 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
954                                  int len)
955 {
956         struct be_eth_wrb *wrb;
957         struct be_queue_info *txq = &txo->q;
958
959         wrb = queue_head_node(txq);
960         wrb_fill(wrb, busaddr, len);
961         queue_head_inc(txq);
962 }
963
964 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
965  * was invoked. The producer index is restored to the previous packet and the
966  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
967  */
968 static void be_xmit_restore(struct be_adapter *adapter,
969                             struct be_tx_obj *txo, u32 head, bool map_single,
970                             u32 copied)
971 {
972         struct device *dev;
973         struct be_eth_wrb *wrb;
974         struct be_queue_info *txq = &txo->q;
975
976         dev = &adapter->pdev->dev;
977         txq->head = head;
978
979         /* skip the first wrb (hdr); it's not mapped */
980         queue_head_inc(txq);
981         while (copied) {
982                 wrb = queue_head_node(txq);
983                 unmap_tx_frag(dev, wrb, map_single);
984                 map_single = false;
985                 copied -= le32_to_cpu(wrb->frag_len);
986                 queue_head_inc(txq);
987         }
988
989         txq->head = head;
990 }
991
992 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
993  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
994  * of WRBs used up by the packet.
995  */
996 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
997                            struct sk_buff *skb,
998                            struct be_wrb_params *wrb_params)
999 {
1000         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001         struct device *dev = &adapter->pdev->dev;
1002         bool map_single = false;
1003         u32 head;
1004         dma_addr_t busaddr;
1005         int len;
1006
1007         head = be_tx_get_wrb_hdr(txo);
1008
1009         if (skb->len > skb->data_len) {
1010                 len = skb_headlen(skb);
1011
1012                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013                 if (dma_mapping_error(dev, busaddr))
1014                         goto dma_err;
1015                 map_single = true;
1016                 be_tx_setup_wrb_frag(txo, busaddr, len);
1017                 copied += len;
1018         }
1019
1020         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022                 len = skb_frag_size(frag);
1023
1024                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025                 if (dma_mapping_error(dev, busaddr))
1026                         goto dma_err;
1027                 be_tx_setup_wrb_frag(txo, busaddr, len);
1028                 copied += len;
1029         }
1030
1031         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032
1033         be_tx_stats_update(txo, skb);
1034         return wrb_cnt;
1035
1036 dma_err:
1037         adapter->drv_stats.dma_map_errors++;
1038         be_xmit_restore(adapter, txo, head, map_single, copied);
1039         return 0;
1040 }
1041
1042 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043 {
1044         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045 }
1046
1047 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048                                              struct sk_buff *skb,
1049                                              struct be_wrb_params
1050                                              *wrb_params)
1051 {
1052         u16 vlan_tag = 0;
1053
1054         skb = skb_share_check(skb, GFP_ATOMIC);
1055         if (unlikely(!skb))
1056                 return skb;
1057
1058         if (skb_vlan_tag_present(skb))
1059                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1060
1061         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1062                 if (!vlan_tag)
1063                         vlan_tag = adapter->pvid;
1064                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                  * skip VLAN insertion
1066                  */
1067                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068         }
1069
1070         if (vlan_tag) {
1071                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                 vlan_tag);
1073                 if (unlikely(!skb))
1074                         return skb;
1075                 skb->vlan_tci = 0;
1076         }
1077
1078         /* Insert the outer VLAN, if any */
1079         if (adapter->qnq_vid) {
1080                 vlan_tag = adapter->qnq_vid;
1081                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                 vlan_tag);
1083                 if (unlikely(!skb))
1084                         return skb;
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086         }
1087
1088         return skb;
1089 }
1090
1091 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092 {
1093         struct ethhdr *eh = (struct ethhdr *)skb->data;
1094         u16 offset = ETH_HLEN;
1095
1096         if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                 offset += sizeof(struct ipv6hdr);
1100                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                     ip6h->nexthdr != NEXTHDR_UDP) {
1102                         struct ipv6_opt_hdr *ehdr =
1103                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                         if (ehdr->hdrlen == 0xff)
1107                                 return true;
1108                 }
1109         }
1110         return false;
1111 }
1112
1113 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114 {
1115         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116 }
1117
1118 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121 }
1122
1123 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                   struct sk_buff *skb,
1125                                                   struct be_wrb_params
1126                                                   *wrb_params)
1127 {
1128         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129         unsigned int eth_hdr_len;
1130         struct iphdr *ip;
1131
1132         /* For padded packets, BE HW modifies tot_len field in IP header
1133          * incorrecly when VLAN tag is inserted by HW.
1134          * For padded packets, Lancer computes incorrect checksum.
1135          */
1136         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                 VLAN_ETH_HLEN : ETH_HLEN;
1138         if (skb->len <= 60 &&
1139             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140             is_ipv4_pkt(skb)) {
1141                 ip = (struct iphdr *)ip_hdr(skb);
1142                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_broadcast_packet(eh, adapter)        \
1269                 (is_multicast_ether_addr(eh->h_dest) && \
1270                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1271
1272 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1273
1274 #define is_arp_filt_enabled(adapter)    \
1275                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1276
1277 #define is_dhcp_client_filt_enabled(adapter)    \
1278                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1279
1280 #define is_dhcp_srvr_filt_enabled(adapter)      \
1281                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1282
1283 #define is_nbios_filt_enabled(adapter)  \
1284                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1285
1286 #define is_ipv6_na_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask &       \
1288                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1289
1290 #define is_ipv6_ra_filt_enabled(adapter)        \
1291                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1292
1293 #define is_ipv6_ras_filt_enabled(adapter)       \
1294                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1295
1296 #define is_broadcast_filt_enabled(adapter)      \
1297                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1298
1299 #define is_multicast_filt_enabled(adapter)      \
1300                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1301
1302 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1303                                struct sk_buff **skb)
1304 {
1305         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1306         bool os2bmc = false;
1307
1308         if (!be_is_os2bmc_enabled(adapter))
1309                 goto done;
1310
1311         if (!is_multicast_ether_addr(eh->h_dest))
1312                 goto done;
1313
1314         if (is_mc_allowed_on_bmc(adapter, eh) ||
1315             is_bc_allowed_on_bmc(adapter, eh) ||
1316             is_arp_allowed_on_bmc(adapter, (*skb))) {
1317                 os2bmc = true;
1318                 goto done;
1319         }
1320
1321         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1322                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1323                 u8 nexthdr = hdr->nexthdr;
1324
1325                 if (nexthdr == IPPROTO_ICMPV6) {
1326                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1327
1328                         switch (icmp6->icmp6_type) {
1329                         case NDISC_ROUTER_ADVERTISEMENT:
1330                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1331                                 goto done;
1332                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1333                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1334                                 goto done;
1335                         default:
1336                                 break;
1337                         }
1338                 }
1339         }
1340
1341         if (is_udp_pkt((*skb))) {
1342                 struct udphdr *udp = udp_hdr((*skb));
1343
1344                 switch (ntohs(udp->dest)) {
1345                 case DHCP_CLIENT_PORT:
1346                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1347                         goto done;
1348                 case DHCP_SERVER_PORT:
1349                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1350                         goto done;
1351                 case NET_BIOS_PORT1:
1352                 case NET_BIOS_PORT2:
1353                         os2bmc = is_nbios_filt_enabled(adapter);
1354                         goto done;
1355                 case DHCPV6_RAS_PORT:
1356                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1357                         goto done;
1358                 default:
1359                         break;
1360                 }
1361         }
1362 done:
1363         /* For packets over a vlan, which are destined
1364          * to BMC, asic expects the vlan to be inline in the packet.
1365          */
1366         if (os2bmc)
1367                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1368
1369         return os2bmc;
1370 }
1371
1372 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1373 {
1374         struct be_adapter *adapter = netdev_priv(netdev);
1375         u16 q_idx = skb_get_queue_mapping(skb);
1376         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1377         struct be_wrb_params wrb_params = { 0 };
1378         bool flush = !skb->xmit_more;
1379         u16 wrb_cnt;
1380
1381         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1382         if (unlikely(!skb))
1383                 goto drop;
1384
1385         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1386
1387         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1388         if (unlikely(!wrb_cnt)) {
1389                 dev_kfree_skb_any(skb);
1390                 goto drop;
1391         }
1392
1393         /* if os2bmc is enabled and if the pkt is destined to bmc,
1394          * enqueue the pkt a 2nd time with mgmt bit set.
1395          */
1396         if (be_send_pkt_to_bmc(adapter, &skb)) {
1397                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1398                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1399                 if (unlikely(!wrb_cnt))
1400                         goto drop;
1401                 else
1402                         skb_get(skb);
1403         }
1404
1405         if (be_is_txq_full(txo)) {
1406                 netif_stop_subqueue(netdev, q_idx);
1407                 tx_stats(txo)->tx_stops++;
1408         }
1409
1410         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1411                 be_xmit_flush(adapter, txo);
1412
1413         return NETDEV_TX_OK;
1414 drop:
1415         tx_stats(txo)->tx_drv_drops++;
1416         /* Flush the already enqueued tx requests */
1417         if (flush && txo->pend_wrb_cnt)
1418                 be_xmit_flush(adapter, txo);
1419
1420         return NETDEV_TX_OK;
1421 }
1422
1423 static void be_tx_timeout(struct net_device *netdev)
1424 {
1425         struct be_adapter *adapter = netdev_priv(netdev);
1426         struct device *dev = &adapter->pdev->dev;
1427         struct be_tx_obj *txo;
1428         struct sk_buff *skb;
1429         struct tcphdr *tcphdr;
1430         struct udphdr *udphdr;
1431         u32 *entry;
1432         int status;
1433         int i, j;
1434
1435         for_all_tx_queues(adapter, txo, i) {
1436                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1437                          i, txo->q.head, txo->q.tail,
1438                          atomic_read(&txo->q.used), txo->q.id);
1439
1440                 entry = txo->q.dma_mem.va;
1441                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1442                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1443                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1444                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1445                                          j, entry[j], entry[j + 1],
1446                                          entry[j + 2], entry[j + 3]);
1447                         }
1448                 }
1449
1450                 entry = txo->cq.dma_mem.va;
1451                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1452                          i, txo->cq.head, txo->cq.tail,
1453                          atomic_read(&txo->cq.used));
1454                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1455                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1456                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1457                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1458                                          j, entry[j], entry[j + 1],
1459                                          entry[j + 2], entry[j + 3]);
1460                         }
1461                 }
1462
1463                 for (j = 0; j < TX_Q_LEN; j++) {
1464                         if (txo->sent_skb_list[j]) {
1465                                 skb = txo->sent_skb_list[j];
1466                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1467                                         tcphdr = tcp_hdr(skb);
1468                                         dev_info(dev, "TCP source port %d\n",
1469                                                  ntohs(tcphdr->source));
1470                                         dev_info(dev, "TCP dest port %d\n",
1471                                                  ntohs(tcphdr->dest));
1472                                         dev_info(dev, "TCP sequence num %d\n",
1473                                                  ntohs(tcphdr->seq));
1474                                         dev_info(dev, "TCP ack_seq %d\n",
1475                                                  ntohs(tcphdr->ack_seq));
1476                                 } else if (ip_hdr(skb)->protocol ==
1477                                            IPPROTO_UDP) {
1478                                         udphdr = udp_hdr(skb);
1479                                         dev_info(dev, "UDP source port %d\n",
1480                                                  ntohs(udphdr->source));
1481                                         dev_info(dev, "UDP dest port %d\n",
1482                                                  ntohs(udphdr->dest));
1483                                 }
1484                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1485                                          j, skb, skb->len, skb->protocol);
1486                         }
1487                 }
1488         }
1489
1490         if (lancer_chip(adapter)) {
1491                 dev_info(dev, "Initiating reset due to tx timeout\n");
1492                 dev_info(dev, "Resetting adapter\n");
1493                 status = lancer_physdev_ctrl(adapter,
1494                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1495                 if (status)
1496                         dev_err(dev, "Reset failed .. Reboot server\n");
1497         }
1498 }
1499
1500 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1501 {
1502         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1503                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1504 }
1505
1506 static int be_set_vlan_promisc(struct be_adapter *adapter)
1507 {
1508         struct device *dev = &adapter->pdev->dev;
1509         int status;
1510
1511         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1512                 return 0;
1513
1514         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1515         if (!status) {
1516                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1517                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1518         } else {
1519                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1520         }
1521         return status;
1522 }
1523
1524 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1525 {
1526         struct device *dev = &adapter->pdev->dev;
1527         int status;
1528
1529         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1530         if (!status) {
1531                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1532                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1533         }
1534         return status;
1535 }
1536
1537 /*
1538  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1539  * If the user configures more, place BE in vlan promiscuous mode.
1540  */
1541 static int be_vid_config(struct be_adapter *adapter)
1542 {
1543         struct device *dev = &adapter->pdev->dev;
1544         u16 vids[BE_NUM_VLANS_SUPPORTED];
1545         u16 num = 0, i = 0;
1546         int status = 0;
1547
1548         /* No need to change the VLAN state if the I/F is in promiscuous */
1549         if (adapter->netdev->flags & IFF_PROMISC)
1550                 return 0;
1551
1552         if (adapter->vlans_added > be_max_vlans(adapter))
1553                 return be_set_vlan_promisc(adapter);
1554
1555         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1556                 status = be_clear_vlan_promisc(adapter);
1557                 if (status)
1558                         return status;
1559         }
1560         /* Construct VLAN Table to give to HW */
1561         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1562                 vids[num++] = cpu_to_le16(i);
1563
1564         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1565         if (status) {
1566                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1567                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1568                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1569                     addl_status(status) ==
1570                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1571                         return be_set_vlan_promisc(adapter);
1572         }
1573         return status;
1574 }
1575
1576 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1577 {
1578         struct be_adapter *adapter = netdev_priv(netdev);
1579         int status = 0;
1580
1581         mutex_lock(&adapter->rx_filter_lock);
1582
1583         /* Packets with VID 0 are always received by Lancer by default */
1584         if (lancer_chip(adapter) && vid == 0)
1585                 goto done;
1586
1587         if (test_bit(vid, adapter->vids))
1588                 goto done;
1589
1590         set_bit(vid, adapter->vids);
1591         adapter->vlans_added++;
1592
1593         status = be_vid_config(adapter);
1594 done:
1595         mutex_unlock(&adapter->rx_filter_lock);
1596         return status;
1597 }
1598
1599 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1600 {
1601         struct be_adapter *adapter = netdev_priv(netdev);
1602         int status = 0;
1603
1604         mutex_lock(&adapter->rx_filter_lock);
1605
1606         /* Packets with VID 0 are always received by Lancer by default */
1607         if (lancer_chip(adapter) && vid == 0)
1608                 goto done;
1609
1610         if (!test_bit(vid, adapter->vids))
1611                 goto done;
1612
1613         clear_bit(vid, adapter->vids);
1614         adapter->vlans_added--;
1615
1616         status = be_vid_config(adapter);
1617 done:
1618         mutex_unlock(&adapter->rx_filter_lock);
1619         return status;
1620 }
1621
1622 static void be_set_all_promisc(struct be_adapter *adapter)
1623 {
1624         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1625         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1626 }
1627
1628 static void be_set_mc_promisc(struct be_adapter *adapter)
1629 {
1630         int status;
1631
1632         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1633                 return;
1634
1635         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1636         if (!status)
1637                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1638 }
1639
1640 static void be_set_uc_promisc(struct be_adapter *adapter)
1641 {
1642         int status;
1643
1644         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1645                 return;
1646
1647         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1648         if (!status)
1649                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1650 }
1651
1652 static void be_clear_uc_promisc(struct be_adapter *adapter)
1653 {
1654         int status;
1655
1656         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1657                 return;
1658
1659         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1660         if (!status)
1661                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1662 }
1663
1664 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1665  * We use a single callback function for both sync and unsync. We really don't
1666  * add/remove addresses through this callback. But, we use it to detect changes
1667  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1668  */
1669 static int be_uc_list_update(struct net_device *netdev,
1670                              const unsigned char *addr)
1671 {
1672         struct be_adapter *adapter = netdev_priv(netdev);
1673
1674         adapter->update_uc_list = true;
1675         return 0;
1676 }
1677
1678 static int be_mc_list_update(struct net_device *netdev,
1679                              const unsigned char *addr)
1680 {
1681         struct be_adapter *adapter = netdev_priv(netdev);
1682
1683         adapter->update_mc_list = true;
1684         return 0;
1685 }
1686
1687 static void be_set_mc_list(struct be_adapter *adapter)
1688 {
1689         struct net_device *netdev = adapter->netdev;
1690         struct netdev_hw_addr *ha;
1691         bool mc_promisc = false;
1692         int status;
1693
1694         netif_addr_lock_bh(netdev);
1695         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1696
1697         if (netdev->flags & IFF_PROMISC) {
1698                 adapter->update_mc_list = false;
1699         } else if (netdev->flags & IFF_ALLMULTI ||
1700                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1701                 /* Enable multicast promisc if num configured exceeds
1702                  * what we support
1703                  */
1704                 mc_promisc = true;
1705                 adapter->update_mc_list = false;
1706         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1707                 /* Update mc-list unconditionally if the iface was previously
1708                  * in mc-promisc mode and now is out of that mode.
1709                  */
1710                 adapter->update_mc_list = true;
1711         }
1712
1713         if (adapter->update_mc_list) {
1714                 int i = 0;
1715
1716                 /* cache the mc-list in adapter */
1717                 netdev_for_each_mc_addr(ha, netdev) {
1718                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1719                         i++;
1720                 }
1721                 adapter->mc_count = netdev_mc_count(netdev);
1722         }
1723         netif_addr_unlock_bh(netdev);
1724
1725         if (mc_promisc) {
1726                 be_set_mc_promisc(adapter);
1727         } else if (adapter->update_mc_list) {
1728                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1729                 if (!status)
1730                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1731                 else
1732                         be_set_mc_promisc(adapter);
1733
1734                 adapter->update_mc_list = false;
1735         }
1736 }
1737
1738 static void be_clear_mc_list(struct be_adapter *adapter)
1739 {
1740         struct net_device *netdev = adapter->netdev;
1741
1742         __dev_mc_unsync(netdev, NULL);
1743         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1744         adapter->mc_count = 0;
1745 }
1746
1747 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1748 {
1749         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1750                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1751                 return 0;
1752         }
1753
1754         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1755                                adapter->if_handle,
1756                                &adapter->pmac_id[uc_idx + 1], 0);
1757 }
1758
1759 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1760 {
1761         if (pmac_id == adapter->pmac_id[0])
1762                 return;
1763
1764         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1765 }
1766
1767 static void be_set_uc_list(struct be_adapter *adapter)
1768 {
1769         struct net_device *netdev = adapter->netdev;
1770         struct netdev_hw_addr *ha;
1771         bool uc_promisc = false;
1772         int curr_uc_macs = 0, i;
1773
1774         netif_addr_lock_bh(netdev);
1775         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1776
1777         if (netdev->flags & IFF_PROMISC) {
1778                 adapter->update_uc_list = false;
1779         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1780                 uc_promisc = true;
1781                 adapter->update_uc_list = false;
1782         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1783                 /* Update uc-list unconditionally if the iface was previously
1784                  * in uc-promisc mode and now is out of that mode.
1785                  */
1786                 adapter->update_uc_list = true;
1787         }
1788
1789         if (adapter->update_uc_list) {
1790                 /* cache the uc-list in adapter array */
1791                 i = 0;
1792                 netdev_for_each_uc_addr(ha, netdev) {
1793                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1794                         i++;
1795                 }
1796                 curr_uc_macs = netdev_uc_count(netdev);
1797         }
1798         netif_addr_unlock_bh(netdev);
1799
1800         if (uc_promisc) {
1801                 be_set_uc_promisc(adapter);
1802         } else if (adapter->update_uc_list) {
1803                 be_clear_uc_promisc(adapter);
1804
1805                 for (i = 0; i < adapter->uc_macs; i++)
1806                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1807
1808                 for (i = 0; i < curr_uc_macs; i++)
1809                         be_uc_mac_add(adapter, i);
1810                 adapter->uc_macs = curr_uc_macs;
1811                 adapter->update_uc_list = false;
1812         }
1813 }
1814
1815 static void be_clear_uc_list(struct be_adapter *adapter)
1816 {
1817         struct net_device *netdev = adapter->netdev;
1818         int i;
1819
1820         __dev_uc_unsync(netdev, NULL);
1821         for (i = 0; i < adapter->uc_macs; i++)
1822                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1823
1824         adapter->uc_macs = 0;
1825 }
1826
1827 static void __be_set_rx_mode(struct be_adapter *adapter)
1828 {
1829         struct net_device *netdev = adapter->netdev;
1830
1831         mutex_lock(&adapter->rx_filter_lock);
1832
1833         if (netdev->flags & IFF_PROMISC) {
1834                 if (!be_in_all_promisc(adapter))
1835                         be_set_all_promisc(adapter);
1836         } else if (be_in_all_promisc(adapter)) {
1837                 /* We need to re-program the vlan-list or clear
1838                  * vlan-promisc mode (if needed) when the interface
1839                  * comes out of promisc mode.
1840                  */
1841                 be_vid_config(adapter);
1842         }
1843
1844         be_set_uc_list(adapter);
1845         be_set_mc_list(adapter);
1846
1847         mutex_unlock(&adapter->rx_filter_lock);
1848 }
1849
1850 static void be_work_set_rx_mode(struct work_struct *work)
1851 {
1852         struct be_cmd_work *cmd_work =
1853                                 container_of(work, struct be_cmd_work, work);
1854
1855         __be_set_rx_mode(cmd_work->adapter);
1856         kfree(cmd_work);
1857 }
1858
1859 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1860 {
1861         struct be_adapter *adapter = netdev_priv(netdev);
1862         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1863         int status;
1864
1865         if (!sriov_enabled(adapter))
1866                 return -EPERM;
1867
1868         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1869                 return -EINVAL;
1870
1871         /* Proceed further only if user provided MAC is different
1872          * from active MAC
1873          */
1874         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1875                 return 0;
1876
1877         if (BEx_chip(adapter)) {
1878                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1879                                 vf + 1);
1880
1881                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1882                                          &vf_cfg->pmac_id, vf + 1);
1883         } else {
1884                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1885                                         vf + 1);
1886         }
1887
1888         if (status) {
1889                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1890                         mac, vf, status);
1891                 return be_cmd_status(status);
1892         }
1893
1894         ether_addr_copy(vf_cfg->mac_addr, mac);
1895
1896         return 0;
1897 }
1898
1899 static int be_get_vf_config(struct net_device *netdev, int vf,
1900                             struct ifla_vf_info *vi)
1901 {
1902         struct be_adapter *adapter = netdev_priv(netdev);
1903         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1904
1905         if (!sriov_enabled(adapter))
1906                 return -EPERM;
1907
1908         if (vf >= adapter->num_vfs)
1909                 return -EINVAL;
1910
1911         vi->vf = vf;
1912         vi->max_tx_rate = vf_cfg->tx_rate;
1913         vi->min_tx_rate = 0;
1914         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1915         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1916         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1917         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1918         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1919
1920         return 0;
1921 }
1922
1923 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1924 {
1925         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1926         u16 vids[BE_NUM_VLANS_SUPPORTED];
1927         int vf_if_id = vf_cfg->if_handle;
1928         int status;
1929
1930         /* Enable Transparent VLAN Tagging */
1931         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1932         if (status)
1933                 return status;
1934
1935         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1936         vids[0] = 0;
1937         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1938         if (!status)
1939                 dev_info(&adapter->pdev->dev,
1940                          "Cleared guest VLANs on VF%d", vf);
1941
1942         /* After TVT is enabled, disallow VFs to program VLAN filters */
1943         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1944                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1945                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1946                 if (!status)
1947                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1948         }
1949         return 0;
1950 }
1951
1952 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1953 {
1954         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1955         struct device *dev = &adapter->pdev->dev;
1956         int status;
1957
1958         /* Reset Transparent VLAN Tagging. */
1959         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1960                                        vf_cfg->if_handle, 0, 0);
1961         if (status)
1962                 return status;
1963
1964         /* Allow VFs to program VLAN filtering */
1965         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1966                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1967                                                   BE_PRIV_FILTMGMT, vf + 1);
1968                 if (!status) {
1969                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1970                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1971                 }
1972         }
1973
1974         dev_info(dev,
1975                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1976         return 0;
1977 }
1978
1979 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1980                           __be16 vlan_proto)
1981 {
1982         struct be_adapter *adapter = netdev_priv(netdev);
1983         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1984         int status;
1985
1986         if (!sriov_enabled(adapter))
1987                 return -EPERM;
1988
1989         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1990                 return -EINVAL;
1991
1992         if (vlan_proto != htons(ETH_P_8021Q))
1993                 return -EPROTONOSUPPORT;
1994
1995         if (vlan || qos) {
1996                 vlan |= qos << VLAN_PRIO_SHIFT;
1997                 status = be_set_vf_tvt(adapter, vf, vlan);
1998         } else {
1999                 status = be_clear_vf_tvt(adapter, vf);
2000         }
2001
2002         if (status) {
2003                 dev_err(&adapter->pdev->dev,
2004                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2005                         status);
2006                 return be_cmd_status(status);
2007         }
2008
2009         vf_cfg->vlan_tag = vlan;
2010         return 0;
2011 }
2012
2013 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2014                              int min_tx_rate, int max_tx_rate)
2015 {
2016         struct be_adapter *adapter = netdev_priv(netdev);
2017         struct device *dev = &adapter->pdev->dev;
2018         int percent_rate, status = 0;
2019         u16 link_speed = 0;
2020         u8 link_status;
2021
2022         if (!sriov_enabled(adapter))
2023                 return -EPERM;
2024
2025         if (vf >= adapter->num_vfs)
2026                 return -EINVAL;
2027
2028         if (min_tx_rate)
2029                 return -EINVAL;
2030
2031         if (!max_tx_rate)
2032                 goto config_qos;
2033
2034         status = be_cmd_link_status_query(adapter, &link_speed,
2035                                           &link_status, 0);
2036         if (status)
2037                 goto err;
2038
2039         if (!link_status) {
2040                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2041                 status = -ENETDOWN;
2042                 goto err;
2043         }
2044
2045         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2046                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2047                         link_speed);
2048                 status = -EINVAL;
2049                 goto err;
2050         }
2051
2052         /* On Skyhawk the QOS setting must be done only as a % value */
2053         percent_rate = link_speed / 100;
2054         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2055                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2056                         percent_rate);
2057                 status = -EINVAL;
2058                 goto err;
2059         }
2060
2061 config_qos:
2062         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2063         if (status)
2064                 goto err;
2065
2066         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2067         return 0;
2068
2069 err:
2070         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2071                 max_tx_rate, vf);
2072         return be_cmd_status(status);
2073 }
2074
2075 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2076                                 int link_state)
2077 {
2078         struct be_adapter *adapter = netdev_priv(netdev);
2079         int status;
2080
2081         if (!sriov_enabled(adapter))
2082                 return -EPERM;
2083
2084         if (vf >= adapter->num_vfs)
2085                 return -EINVAL;
2086
2087         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2088         if (status) {
2089                 dev_err(&adapter->pdev->dev,
2090                         "Link state change on VF %d failed: %#x\n", vf, status);
2091                 return be_cmd_status(status);
2092         }
2093
2094         adapter->vf_cfg[vf].plink_tracking = link_state;
2095
2096         return 0;
2097 }
2098
2099 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2100 {
2101         struct be_adapter *adapter = netdev_priv(netdev);
2102         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2103         u8 spoofchk;
2104         int status;
2105
2106         if (!sriov_enabled(adapter))
2107                 return -EPERM;
2108
2109         if (vf >= adapter->num_vfs)
2110                 return -EINVAL;
2111
2112         if (BEx_chip(adapter))
2113                 return -EOPNOTSUPP;
2114
2115         if (enable == vf_cfg->spoofchk)
2116                 return 0;
2117
2118         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2119
2120         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2121                                        0, spoofchk);
2122         if (status) {
2123                 dev_err(&adapter->pdev->dev,
2124                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2125                 return be_cmd_status(status);
2126         }
2127
2128         vf_cfg->spoofchk = enable;
2129         return 0;
2130 }
2131
2132 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2133                           ulong now)
2134 {
2135         aic->rx_pkts_prev = rx_pkts;
2136         aic->tx_reqs_prev = tx_pkts;
2137         aic->jiffies = now;
2138 }
2139
2140 static int be_get_new_eqd(struct be_eq_obj *eqo)
2141 {
2142         struct be_adapter *adapter = eqo->adapter;
2143         int eqd, start;
2144         struct be_aic_obj *aic;
2145         struct be_rx_obj *rxo;
2146         struct be_tx_obj *txo;
2147         u64 rx_pkts = 0, tx_pkts = 0;
2148         ulong now;
2149         u32 pps, delta;
2150         int i;
2151
2152         aic = &adapter->aic_obj[eqo->idx];
2153         if (!aic->enable) {
2154                 if (aic->jiffies)
2155                         aic->jiffies = 0;
2156                 eqd = aic->et_eqd;
2157                 return eqd;
2158         }
2159
2160         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2161                 do {
2162                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2163                         rx_pkts += rxo->stats.rx_pkts;
2164                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2165         }
2166
2167         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2168                 do {
2169                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2170                         tx_pkts += txo->stats.tx_reqs;
2171                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2172         }
2173
2174         /* Skip, if wrapped around or first calculation */
2175         now = jiffies;
2176         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2177             rx_pkts < aic->rx_pkts_prev ||
2178             tx_pkts < aic->tx_reqs_prev) {
2179                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2180                 return aic->prev_eqd;
2181         }
2182
2183         delta = jiffies_to_msecs(now - aic->jiffies);
2184         if (delta == 0)
2185                 return aic->prev_eqd;
2186
2187         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2188                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2189         eqd = (pps / 15000) << 2;
2190
2191         if (eqd < 8)
2192                 eqd = 0;
2193         eqd = min_t(u32, eqd, aic->max_eqd);
2194         eqd = max_t(u32, eqd, aic->min_eqd);
2195
2196         be_aic_update(aic, rx_pkts, tx_pkts, now);
2197
2198         return eqd;
2199 }
2200
2201 /* For Skyhawk-R only */
2202 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2203 {
2204         struct be_adapter *adapter = eqo->adapter;
2205         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2206         ulong now = jiffies;
2207         int eqd;
2208         u32 mult_enc;
2209
2210         if (!aic->enable)
2211                 return 0;
2212
2213         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2214                 eqd = aic->prev_eqd;
2215         else
2216                 eqd = be_get_new_eqd(eqo);
2217
2218         if (eqd > 100)
2219                 mult_enc = R2I_DLY_ENC_1;
2220         else if (eqd > 60)
2221                 mult_enc = R2I_DLY_ENC_2;
2222         else if (eqd > 20)
2223                 mult_enc = R2I_DLY_ENC_3;
2224         else
2225                 mult_enc = R2I_DLY_ENC_0;
2226
2227         aic->prev_eqd = eqd;
2228
2229         return mult_enc;
2230 }
2231
2232 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2233 {
2234         struct be_set_eqd set_eqd[MAX_EVT_QS];
2235         struct be_aic_obj *aic;
2236         struct be_eq_obj *eqo;
2237         int i, num = 0, eqd;
2238
2239         for_all_evt_queues(adapter, eqo, i) {
2240                 aic = &adapter->aic_obj[eqo->idx];
2241                 eqd = be_get_new_eqd(eqo);
2242                 if (force_update || eqd != aic->prev_eqd) {
2243                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2244                         set_eqd[num].eq_id = eqo->q.id;
2245                         aic->prev_eqd = eqd;
2246                         num++;
2247                 }
2248         }
2249
2250         if (num)
2251                 be_cmd_modify_eqd(adapter, set_eqd, num);
2252 }
2253
2254 static void be_rx_stats_update(struct be_rx_obj *rxo,
2255                                struct be_rx_compl_info *rxcp)
2256 {
2257         struct be_rx_stats *stats = rx_stats(rxo);
2258
2259         u64_stats_update_begin(&stats->sync);
2260         stats->rx_compl++;
2261         stats->rx_bytes += rxcp->pkt_size;
2262         stats->rx_pkts++;
2263         if (rxcp->tunneled)
2264                 stats->rx_vxlan_offload_pkts++;
2265         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2266                 stats->rx_mcast_pkts++;
2267         if (rxcp->err)
2268                 stats->rx_compl_err++;
2269         u64_stats_update_end(&stats->sync);
2270 }
2271
2272 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2273 {
2274         /* L4 checksum is not reliable for non TCP/UDP packets.
2275          * Also ignore ipcksm for ipv6 pkts
2276          */
2277         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2278                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2279 }
2280
2281 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2282 {
2283         struct be_adapter *adapter = rxo->adapter;
2284         struct be_rx_page_info *rx_page_info;
2285         struct be_queue_info *rxq = &rxo->q;
2286         u32 frag_idx = rxq->tail;
2287
2288         rx_page_info = &rxo->page_info_tbl[frag_idx];
2289         BUG_ON(!rx_page_info->page);
2290
2291         if (rx_page_info->last_frag) {
2292                 dma_unmap_page(&adapter->pdev->dev,
2293                                dma_unmap_addr(rx_page_info, bus),
2294                                adapter->big_page_size, DMA_FROM_DEVICE);
2295                 rx_page_info->last_frag = false;
2296         } else {
2297                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2298                                         dma_unmap_addr(rx_page_info, bus),
2299                                         rx_frag_size, DMA_FROM_DEVICE);
2300         }
2301
2302         queue_tail_inc(rxq);
2303         atomic_dec(&rxq->used);
2304         return rx_page_info;
2305 }
2306
2307 /* Throwaway the data in the Rx completion */
2308 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2309                                 struct be_rx_compl_info *rxcp)
2310 {
2311         struct be_rx_page_info *page_info;
2312         u16 i, num_rcvd = rxcp->num_rcvd;
2313
2314         for (i = 0; i < num_rcvd; i++) {
2315                 page_info = get_rx_page_info(rxo);
2316                 put_page(page_info->page);
2317                 memset(page_info, 0, sizeof(*page_info));
2318         }
2319 }
2320
2321 /*
2322  * skb_fill_rx_data forms a complete skb for an ether frame
2323  * indicated by rxcp.
2324  */
2325 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2326                              struct be_rx_compl_info *rxcp)
2327 {
2328         struct be_rx_page_info *page_info;
2329         u16 i, j;
2330         u16 hdr_len, curr_frag_len, remaining;
2331         u8 *start;
2332
2333         page_info = get_rx_page_info(rxo);
2334         start = page_address(page_info->page) + page_info->page_offset;
2335         prefetch(start);
2336
2337         /* Copy data in the first descriptor of this completion */
2338         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2339
2340         skb->len = curr_frag_len;
2341         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2342                 memcpy(skb->data, start, curr_frag_len);
2343                 /* Complete packet has now been moved to data */
2344                 put_page(page_info->page);
2345                 skb->data_len = 0;
2346                 skb->tail += curr_frag_len;
2347         } else {
2348                 hdr_len = ETH_HLEN;
2349                 memcpy(skb->data, start, hdr_len);
2350                 skb_shinfo(skb)->nr_frags = 1;
2351                 skb_frag_set_page(skb, 0, page_info->page);
2352                 skb_shinfo(skb)->frags[0].page_offset =
2353                                         page_info->page_offset + hdr_len;
2354                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2355                                   curr_frag_len - hdr_len);
2356                 skb->data_len = curr_frag_len - hdr_len;
2357                 skb->truesize += rx_frag_size;
2358                 skb->tail += hdr_len;
2359         }
2360         page_info->page = NULL;
2361
2362         if (rxcp->pkt_size <= rx_frag_size) {
2363                 BUG_ON(rxcp->num_rcvd != 1);
2364                 return;
2365         }
2366
2367         /* More frags present for this completion */
2368         remaining = rxcp->pkt_size - curr_frag_len;
2369         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2370                 page_info = get_rx_page_info(rxo);
2371                 curr_frag_len = min(remaining, rx_frag_size);
2372
2373                 /* Coalesce all frags from the same physical page in one slot */
2374                 if (page_info->page_offset == 0) {
2375                         /* Fresh page */
2376                         j++;
2377                         skb_frag_set_page(skb, j, page_info->page);
2378                         skb_shinfo(skb)->frags[j].page_offset =
2379                                                         page_info->page_offset;
2380                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2381                         skb_shinfo(skb)->nr_frags++;
2382                 } else {
2383                         put_page(page_info->page);
2384                 }
2385
2386                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2387                 skb->len += curr_frag_len;
2388                 skb->data_len += curr_frag_len;
2389                 skb->truesize += rx_frag_size;
2390                 remaining -= curr_frag_len;
2391                 page_info->page = NULL;
2392         }
2393         BUG_ON(j > MAX_SKB_FRAGS);
2394 }
2395
2396 /* Process the RX completion indicated by rxcp when GRO is disabled */
2397 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2398                                 struct be_rx_compl_info *rxcp)
2399 {
2400         struct be_adapter *adapter = rxo->adapter;
2401         struct net_device *netdev = adapter->netdev;
2402         struct sk_buff *skb;
2403
2404         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2405         if (unlikely(!skb)) {
2406                 rx_stats(rxo)->rx_drops_no_skbs++;
2407                 be_rx_compl_discard(rxo, rxcp);
2408                 return;
2409         }
2410
2411         skb_fill_rx_data(rxo, skb, rxcp);
2412
2413         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2414                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2415         else
2416                 skb_checksum_none_assert(skb);
2417
2418         skb->protocol = eth_type_trans(skb, netdev);
2419         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2420         if (netdev->features & NETIF_F_RXHASH)
2421                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2422
2423         skb->csum_level = rxcp->tunneled;
2424         skb_mark_napi_id(skb, napi);
2425
2426         if (rxcp->vlanf)
2427                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2428
2429         netif_receive_skb(skb);
2430 }
2431
2432 /* Process the RX completion indicated by rxcp when GRO is enabled */
2433 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2434                                     struct napi_struct *napi,
2435                                     struct be_rx_compl_info *rxcp)
2436 {
2437         struct be_adapter *adapter = rxo->adapter;
2438         struct be_rx_page_info *page_info;
2439         struct sk_buff *skb = NULL;
2440         u16 remaining, curr_frag_len;
2441         u16 i, j;
2442
2443         skb = napi_get_frags(napi);
2444         if (!skb) {
2445                 be_rx_compl_discard(rxo, rxcp);
2446                 return;
2447         }
2448
2449         remaining = rxcp->pkt_size;
2450         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2451                 page_info = get_rx_page_info(rxo);
2452
2453                 curr_frag_len = min(remaining, rx_frag_size);
2454
2455                 /* Coalesce all frags from the same physical page in one slot */
2456                 if (i == 0 || page_info->page_offset == 0) {
2457                         /* First frag or Fresh page */
2458                         j++;
2459                         skb_frag_set_page(skb, j, page_info->page);
2460                         skb_shinfo(skb)->frags[j].page_offset =
2461                                                         page_info->page_offset;
2462                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2463                 } else {
2464                         put_page(page_info->page);
2465                 }
2466                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2467                 skb->truesize += rx_frag_size;
2468                 remaining -= curr_frag_len;
2469                 memset(page_info, 0, sizeof(*page_info));
2470         }
2471         BUG_ON(j > MAX_SKB_FRAGS);
2472
2473         skb_shinfo(skb)->nr_frags = j + 1;
2474         skb->len = rxcp->pkt_size;
2475         skb->data_len = rxcp->pkt_size;
2476         skb->ip_summed = CHECKSUM_UNNECESSARY;
2477         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2478         if (adapter->netdev->features & NETIF_F_RXHASH)
2479                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2480
2481         skb->csum_level = rxcp->tunneled;
2482
2483         if (rxcp->vlanf)
2484                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2485
2486         napi_gro_frags(napi);
2487 }
2488
2489 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2490                                  struct be_rx_compl_info *rxcp)
2491 {
2492         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2493         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2494         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2495         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2496         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2497         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2498         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2499         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2500         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2501         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2502         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2503         if (rxcp->vlanf) {
2504                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2505                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2506         }
2507         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2508         rxcp->tunneled =
2509                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2510 }
2511
2512 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2513                                  struct be_rx_compl_info *rxcp)
2514 {
2515         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2516         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2517         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2518         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2519         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2520         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2521         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2522         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2523         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2524         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2525         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2526         if (rxcp->vlanf) {
2527                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2528                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2529         }
2530         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2531         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2532 }
2533
2534 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2535 {
2536         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2537         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2538         struct be_adapter *adapter = rxo->adapter;
2539
2540         /* For checking the valid bit it is Ok to use either definition as the
2541          * valid bit is at the same position in both v0 and v1 Rx compl */
2542         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2543                 return NULL;
2544
2545         rmb();
2546         be_dws_le_to_cpu(compl, sizeof(*compl));
2547
2548         if (adapter->be3_native)
2549                 be_parse_rx_compl_v1(compl, rxcp);
2550         else
2551                 be_parse_rx_compl_v0(compl, rxcp);
2552
2553         if (rxcp->ip_frag)
2554                 rxcp->l4_csum = 0;
2555
2556         if (rxcp->vlanf) {
2557                 /* In QNQ modes, if qnq bit is not set, then the packet was
2558                  * tagged only with the transparent outer vlan-tag and must
2559                  * not be treated as a vlan packet by host
2560                  */
2561                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2562                         rxcp->vlanf = 0;
2563
2564                 if (!lancer_chip(adapter))
2565                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2566
2567                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2568                     !test_bit(rxcp->vlan_tag, adapter->vids))
2569                         rxcp->vlanf = 0;
2570         }
2571
2572         /* As the compl has been parsed, reset it; we wont touch it again */
2573         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2574
2575         queue_tail_inc(&rxo->cq);
2576         return rxcp;
2577 }
2578
2579 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2580 {
2581         u32 order = get_order(size);
2582
2583         if (order > 0)
2584                 gfp |= __GFP_COMP;
2585         return  alloc_pages(gfp, order);
2586 }
2587
2588 /*
2589  * Allocate a page, split it to fragments of size rx_frag_size and post as
2590  * receive buffers to BE
2591  */
2592 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2593 {
2594         struct be_adapter *adapter = rxo->adapter;
2595         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2596         struct be_queue_info *rxq = &rxo->q;
2597         struct page *pagep = NULL;
2598         struct device *dev = &adapter->pdev->dev;
2599         struct be_eth_rx_d *rxd;
2600         u64 page_dmaaddr = 0, frag_dmaaddr;
2601         u32 posted, page_offset = 0, notify = 0;
2602
2603         page_info = &rxo->page_info_tbl[rxq->head];
2604         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2605                 if (!pagep) {
2606                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2607                         if (unlikely(!pagep)) {
2608                                 rx_stats(rxo)->rx_post_fail++;
2609                                 break;
2610                         }
2611                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2612                                                     adapter->big_page_size,
2613                                                     DMA_FROM_DEVICE);
2614                         if (dma_mapping_error(dev, page_dmaaddr)) {
2615                                 put_page(pagep);
2616                                 pagep = NULL;
2617                                 adapter->drv_stats.dma_map_errors++;
2618                                 break;
2619                         }
2620                         page_offset = 0;
2621                 } else {
2622                         get_page(pagep);
2623                         page_offset += rx_frag_size;
2624                 }
2625                 page_info->page_offset = page_offset;
2626                 page_info->page = pagep;
2627
2628                 rxd = queue_head_node(rxq);
2629                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2630                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2631                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2632
2633                 /* Any space left in the current big page for another frag? */
2634                 if ((page_offset + rx_frag_size + rx_frag_size) >
2635                                         adapter->big_page_size) {
2636                         pagep = NULL;
2637                         page_info->last_frag = true;
2638                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2639                 } else {
2640                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2641                 }
2642
2643                 prev_page_info = page_info;
2644                 queue_head_inc(rxq);
2645                 page_info = &rxo->page_info_tbl[rxq->head];
2646         }
2647
2648         /* Mark the last frag of a page when we break out of the above loop
2649          * with no more slots available in the RXQ
2650          */
2651         if (pagep) {
2652                 prev_page_info->last_frag = true;
2653                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2654         }
2655
2656         if (posted) {
2657                 atomic_add(posted, &rxq->used);
2658                 if (rxo->rx_post_starved)
2659                         rxo->rx_post_starved = false;
2660                 do {
2661                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2662                         be_rxq_notify(adapter, rxq->id, notify);
2663                         posted -= notify;
2664                 } while (posted);
2665         } else if (atomic_read(&rxq->used) == 0) {
2666                 /* Let be_worker replenish when memory is available */
2667                 rxo->rx_post_starved = true;
2668         }
2669 }
2670
2671 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2672 {
2673         switch (status) {
2674         case BE_TX_COMP_HDR_PARSE_ERR:
2675                 tx_stats(txo)->tx_hdr_parse_err++;
2676                 break;
2677         case BE_TX_COMP_NDMA_ERR:
2678                 tx_stats(txo)->tx_dma_err++;
2679                 break;
2680         case BE_TX_COMP_ACL_ERR:
2681                 tx_stats(txo)->tx_spoof_check_err++;
2682                 break;
2683         }
2684 }
2685
2686 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2687 {
2688         switch (status) {
2689         case LANCER_TX_COMP_LSO_ERR:
2690                 tx_stats(txo)->tx_tso_err++;
2691                 break;
2692         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2693         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2694                 tx_stats(txo)->tx_spoof_check_err++;
2695                 break;
2696         case LANCER_TX_COMP_QINQ_ERR:
2697                 tx_stats(txo)->tx_qinq_err++;
2698                 break;
2699         case LANCER_TX_COMP_PARITY_ERR:
2700                 tx_stats(txo)->tx_internal_parity_err++;
2701                 break;
2702         case LANCER_TX_COMP_DMA_ERR:
2703                 tx_stats(txo)->tx_dma_err++;
2704                 break;
2705         case LANCER_TX_COMP_SGE_ERR:
2706                 tx_stats(txo)->tx_sge_err++;
2707                 break;
2708         }
2709 }
2710
2711 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2712                                                 struct be_tx_obj *txo)
2713 {
2714         struct be_queue_info *tx_cq = &txo->cq;
2715         struct be_tx_compl_info *txcp = &txo->txcp;
2716         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2717
2718         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2719                 return NULL;
2720
2721         /* Ensure load ordering of valid bit dword and other dwords below */
2722         rmb();
2723         be_dws_le_to_cpu(compl, sizeof(*compl));
2724
2725         txcp->status = GET_TX_COMPL_BITS(status, compl);
2726         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2727
2728         if (txcp->status) {
2729                 if (lancer_chip(adapter)) {
2730                         lancer_update_tx_err(txo, txcp->status);
2731                         /* Reset the adapter incase of TSO,
2732                          * SGE or Parity error
2733                          */
2734                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2735                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2736                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2737                                 be_set_error(adapter, BE_ERROR_TX);
2738                 } else {
2739                         be_update_tx_err(txo, txcp->status);
2740                 }
2741         }
2742
2743         if (be_check_error(adapter, BE_ERROR_TX))
2744                 return NULL;
2745
2746         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2747         queue_tail_inc(tx_cq);
2748         return txcp;
2749 }
2750
2751 static u16 be_tx_compl_process(struct be_adapter *adapter,
2752                                struct be_tx_obj *txo, u16 last_index)
2753 {
2754         struct sk_buff **sent_skbs = txo->sent_skb_list;
2755         struct be_queue_info *txq = &txo->q;
2756         struct sk_buff *skb = NULL;
2757         bool unmap_skb_hdr = false;
2758         struct be_eth_wrb *wrb;
2759         u16 num_wrbs = 0;
2760         u32 frag_index;
2761
2762         do {
2763                 if (sent_skbs[txq->tail]) {
2764                         /* Free skb from prev req */
2765                         if (skb)
2766                                 dev_consume_skb_any(skb);
2767                         skb = sent_skbs[txq->tail];
2768                         sent_skbs[txq->tail] = NULL;
2769                         queue_tail_inc(txq);  /* skip hdr wrb */
2770                         num_wrbs++;
2771                         unmap_skb_hdr = true;
2772                 }
2773                 wrb = queue_tail_node(txq);
2774                 frag_index = txq->tail;
2775                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2776                               (unmap_skb_hdr && skb_headlen(skb)));
2777                 unmap_skb_hdr = false;
2778                 queue_tail_inc(txq);
2779                 num_wrbs++;
2780         } while (frag_index != last_index);
2781         dev_consume_skb_any(skb);
2782
2783         return num_wrbs;
2784 }
2785
2786 /* Return the number of events in the event queue */
2787 static inline int events_get(struct be_eq_obj *eqo)
2788 {
2789         struct be_eq_entry *eqe;
2790         int num = 0;
2791
2792         do {
2793                 eqe = queue_tail_node(&eqo->q);
2794                 if (eqe->evt == 0)
2795                         break;
2796
2797                 rmb();
2798                 eqe->evt = 0;
2799                 num++;
2800                 queue_tail_inc(&eqo->q);
2801         } while (true);
2802
2803         return num;
2804 }
2805
2806 /* Leaves the EQ is disarmed state */
2807 static void be_eq_clean(struct be_eq_obj *eqo)
2808 {
2809         int num = events_get(eqo);
2810
2811         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2812 }
2813
2814 /* Free posted rx buffers that were not used */
2815 static void be_rxq_clean(struct be_rx_obj *rxo)
2816 {
2817         struct be_queue_info *rxq = &rxo->q;
2818         struct be_rx_page_info *page_info;
2819
2820         while (atomic_read(&rxq->used) > 0) {
2821                 page_info = get_rx_page_info(rxo);
2822                 put_page(page_info->page);
2823                 memset(page_info, 0, sizeof(*page_info));
2824         }
2825         BUG_ON(atomic_read(&rxq->used));
2826         rxq->tail = 0;
2827         rxq->head = 0;
2828 }
2829
2830 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2831 {
2832         struct be_queue_info *rx_cq = &rxo->cq;
2833         struct be_rx_compl_info *rxcp;
2834         struct be_adapter *adapter = rxo->adapter;
2835         int flush_wait = 0;
2836
2837         /* Consume pending rx completions.
2838          * Wait for the flush completion (identified by zero num_rcvd)
2839          * to arrive. Notify CQ even when there are no more CQ entries
2840          * for HW to flush partially coalesced CQ entries.
2841          * In Lancer, there is no need to wait for flush compl.
2842          */
2843         for (;;) {
2844                 rxcp = be_rx_compl_get(rxo);
2845                 if (!rxcp) {
2846                         if (lancer_chip(adapter))
2847                                 break;
2848
2849                         if (flush_wait++ > 50 ||
2850                             be_check_error(adapter,
2851                                            BE_ERROR_HW)) {
2852                                 dev_warn(&adapter->pdev->dev,
2853                                          "did not receive flush compl\n");
2854                                 break;
2855                         }
2856                         be_cq_notify(adapter, rx_cq->id, true, 0);
2857                         mdelay(1);
2858                 } else {
2859                         be_rx_compl_discard(rxo, rxcp);
2860                         be_cq_notify(adapter, rx_cq->id, false, 1);
2861                         if (rxcp->num_rcvd == 0)
2862                                 break;
2863                 }
2864         }
2865
2866         /* After cleanup, leave the CQ in unarmed state */
2867         be_cq_notify(adapter, rx_cq->id, false, 0);
2868 }
2869
2870 static void be_tx_compl_clean(struct be_adapter *adapter)
2871 {
2872         struct device *dev = &adapter->pdev->dev;
2873         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2874         struct be_tx_compl_info *txcp;
2875         struct be_queue_info *txq;
2876         u32 end_idx, notified_idx;
2877         struct be_tx_obj *txo;
2878         int i, pending_txqs;
2879
2880         /* Stop polling for compls when HW has been silent for 10ms */
2881         do {
2882                 pending_txqs = adapter->num_tx_qs;
2883
2884                 for_all_tx_queues(adapter, txo, i) {
2885                         cmpl = 0;
2886                         num_wrbs = 0;
2887                         txq = &txo->q;
2888                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2889                                 num_wrbs +=
2890                                         be_tx_compl_process(adapter, txo,
2891                                                             txcp->end_index);
2892                                 cmpl++;
2893                         }
2894                         if (cmpl) {
2895                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2896                                 atomic_sub(num_wrbs, &txq->used);
2897                                 timeo = 0;
2898                         }
2899                         if (!be_is_tx_compl_pending(txo))
2900                                 pending_txqs--;
2901                 }
2902
2903                 if (pending_txqs == 0 || ++timeo > 10 ||
2904                     be_check_error(adapter, BE_ERROR_HW))
2905                         break;
2906
2907                 mdelay(1);
2908         } while (true);
2909
2910         /* Free enqueued TX that was never notified to HW */
2911         for_all_tx_queues(adapter, txo, i) {
2912                 txq = &txo->q;
2913
2914                 if (atomic_read(&txq->used)) {
2915                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2916                                  i, atomic_read(&txq->used));
2917                         notified_idx = txq->tail;
2918                         end_idx = txq->tail;
2919                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2920                                   txq->len);
2921                         /* Use the tx-compl process logic to handle requests
2922                          * that were not sent to the HW.
2923                          */
2924                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2925                         atomic_sub(num_wrbs, &txq->used);
2926                         BUG_ON(atomic_read(&txq->used));
2927                         txo->pend_wrb_cnt = 0;
2928                         /* Since hw was never notified of these requests,
2929                          * reset TXQ indices
2930                          */
2931                         txq->head = notified_idx;
2932                         txq->tail = notified_idx;
2933                 }
2934         }
2935 }
2936
2937 static void be_evt_queues_destroy(struct be_adapter *adapter)
2938 {
2939         struct be_eq_obj *eqo;
2940         int i;
2941
2942         for_all_evt_queues(adapter, eqo, i) {
2943                 if (eqo->q.created) {
2944                         be_eq_clean(eqo);
2945                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2946                         netif_napi_del(&eqo->napi);
2947                         free_cpumask_var(eqo->affinity_mask);
2948                 }
2949                 be_queue_free(adapter, &eqo->q);
2950         }
2951 }
2952
2953 static int be_evt_queues_create(struct be_adapter *adapter)
2954 {
2955         struct be_queue_info *eq;
2956         struct be_eq_obj *eqo;
2957         struct be_aic_obj *aic;
2958         int i, rc;
2959
2960         /* need enough EQs to service both RX and TX queues */
2961         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2962                                     max(adapter->cfg_num_rx_irqs,
2963                                         adapter->cfg_num_tx_irqs));
2964
2965         for_all_evt_queues(adapter, eqo, i) {
2966                 int numa_node = dev_to_node(&adapter->pdev->dev);
2967
2968                 aic = &adapter->aic_obj[i];
2969                 eqo->adapter = adapter;
2970                 eqo->idx = i;
2971                 aic->max_eqd = BE_MAX_EQD;
2972                 aic->enable = true;
2973
2974                 eq = &eqo->q;
2975                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2976                                     sizeof(struct be_eq_entry));
2977                 if (rc)
2978                         return rc;
2979
2980                 rc = be_cmd_eq_create(adapter, eqo);
2981                 if (rc)
2982                         return rc;
2983
2984                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2985                         return -ENOMEM;
2986                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2987                                 eqo->affinity_mask);
2988                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2989                                BE_NAPI_WEIGHT);
2990         }
2991         return 0;
2992 }
2993
2994 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2995 {
2996         struct be_queue_info *q;
2997
2998         q = &adapter->mcc_obj.q;
2999         if (q->created)
3000                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3001         be_queue_free(adapter, q);
3002
3003         q = &adapter->mcc_obj.cq;
3004         if (q->created)
3005                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3006         be_queue_free(adapter, q);
3007 }
3008
3009 /* Must be called only after TX qs are created as MCC shares TX EQ */
3010 static int be_mcc_queues_create(struct be_adapter *adapter)
3011 {
3012         struct be_queue_info *q, *cq;
3013
3014         cq = &adapter->mcc_obj.cq;
3015         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3016                            sizeof(struct be_mcc_compl)))
3017                 goto err;
3018
3019         /* Use the default EQ for MCC completions */
3020         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3021                 goto mcc_cq_free;
3022
3023         q = &adapter->mcc_obj.q;
3024         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3025                 goto mcc_cq_destroy;
3026
3027         if (be_cmd_mccq_create(adapter, q, cq))
3028                 goto mcc_q_free;
3029
3030         return 0;
3031
3032 mcc_q_free:
3033         be_queue_free(adapter, q);
3034 mcc_cq_destroy:
3035         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3036 mcc_cq_free:
3037         be_queue_free(adapter, cq);
3038 err:
3039         return -1;
3040 }
3041
3042 static void be_tx_queues_destroy(struct be_adapter *adapter)
3043 {
3044         struct be_queue_info *q;
3045         struct be_tx_obj *txo;
3046         u8 i;
3047
3048         for_all_tx_queues(adapter, txo, i) {
3049                 q = &txo->q;
3050                 if (q->created)
3051                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3052                 be_queue_free(adapter, q);
3053
3054                 q = &txo->cq;
3055                 if (q->created)
3056                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3057                 be_queue_free(adapter, q);
3058         }
3059 }
3060
3061 static int be_tx_qs_create(struct be_adapter *adapter)
3062 {
3063         struct be_queue_info *cq;
3064         struct be_tx_obj *txo;
3065         struct be_eq_obj *eqo;
3066         int status, i;
3067
3068         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3069
3070         for_all_tx_queues(adapter, txo, i) {
3071                 cq = &txo->cq;
3072                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3073                                         sizeof(struct be_eth_tx_compl));
3074                 if (status)
3075                         return status;
3076
3077                 u64_stats_init(&txo->stats.sync);
3078                 u64_stats_init(&txo->stats.sync_compl);
3079
3080                 /* If num_evt_qs is less than num_tx_qs, then more than
3081                  * one txq share an eq
3082                  */
3083                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3084                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3085                 if (status)
3086                         return status;
3087
3088                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3089                                         sizeof(struct be_eth_wrb));
3090                 if (status)
3091                         return status;
3092
3093                 status = be_cmd_txq_create(adapter, txo);
3094                 if (status)
3095                         return status;
3096
3097                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3098                                     eqo->idx);
3099         }
3100
3101         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3102                  adapter->num_tx_qs);
3103         return 0;
3104 }
3105
3106 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3107 {
3108         struct be_queue_info *q;
3109         struct be_rx_obj *rxo;
3110         int i;
3111
3112         for_all_rx_queues(adapter, rxo, i) {
3113                 q = &rxo->cq;
3114                 if (q->created)
3115                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3116                 be_queue_free(adapter, q);
3117         }
3118 }
3119
3120 static int be_rx_cqs_create(struct be_adapter *adapter)
3121 {
3122         struct be_queue_info *eq, *cq;
3123         struct be_rx_obj *rxo;
3124         int rc, i;
3125
3126         adapter->num_rss_qs =
3127                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3128
3129         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3130         if (adapter->num_rss_qs < 2)
3131                 adapter->num_rss_qs = 0;
3132
3133         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3134
3135         /* When the interface is not capable of RSS rings (and there is no
3136          * need to create a default RXQ) we'll still need one RXQ
3137          */
3138         if (adapter->num_rx_qs == 0)
3139                 adapter->num_rx_qs = 1;
3140
3141         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3142         for_all_rx_queues(adapter, rxo, i) {
3143                 rxo->adapter = adapter;
3144                 cq = &rxo->cq;
3145                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3146                                     sizeof(struct be_eth_rx_compl));
3147                 if (rc)
3148                         return rc;
3149
3150                 u64_stats_init(&rxo->stats.sync);
3151                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3152                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3153                 if (rc)
3154                         return rc;
3155         }
3156
3157         dev_info(&adapter->pdev->dev,
3158                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3159         return 0;
3160 }
3161
3162 static irqreturn_t be_intx(int irq, void *dev)
3163 {
3164         struct be_eq_obj *eqo = dev;
3165         struct be_adapter *adapter = eqo->adapter;
3166         int num_evts = 0;
3167
3168         /* IRQ is not expected when NAPI is scheduled as the EQ
3169          * will not be armed.
3170          * But, this can happen on Lancer INTx where it takes
3171          * a while to de-assert INTx or in BE2 where occasionaly
3172          * an interrupt may be raised even when EQ is unarmed.
3173          * If NAPI is already scheduled, then counting & notifying
3174          * events will orphan them.
3175          */
3176         if (napi_schedule_prep(&eqo->napi)) {
3177                 num_evts = events_get(eqo);
3178                 __napi_schedule(&eqo->napi);
3179                 if (num_evts)
3180                         eqo->spurious_intr = 0;
3181         }
3182         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3183
3184         /* Return IRQ_HANDLED only for the the first spurious intr
3185          * after a valid intr to stop the kernel from branding
3186          * this irq as a bad one!
3187          */
3188         if (num_evts || eqo->spurious_intr++ == 0)
3189                 return IRQ_HANDLED;
3190         else
3191                 return IRQ_NONE;
3192 }
3193
3194 static irqreturn_t be_msix(int irq, void *dev)
3195 {
3196         struct be_eq_obj *eqo = dev;
3197
3198         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3199         napi_schedule(&eqo->napi);
3200         return IRQ_HANDLED;
3201 }
3202
3203 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3204 {
3205         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3206 }
3207
3208 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3209                          int budget)
3210 {
3211         struct be_adapter *adapter = rxo->adapter;
3212         struct be_queue_info *rx_cq = &rxo->cq;
3213         struct be_rx_compl_info *rxcp;
3214         u32 work_done;
3215         u32 frags_consumed = 0;
3216
3217         for (work_done = 0; work_done < budget; work_done++) {
3218                 rxcp = be_rx_compl_get(rxo);
3219                 if (!rxcp)
3220                         break;
3221
3222                 /* Is it a flush compl that has no data */
3223                 if (unlikely(rxcp->num_rcvd == 0))
3224                         goto loop_continue;
3225
3226                 /* Discard compl with partial DMA Lancer B0 */
3227                 if (unlikely(!rxcp->pkt_size)) {
3228                         be_rx_compl_discard(rxo, rxcp);
3229                         goto loop_continue;
3230                 }
3231
3232                 /* On BE drop pkts that arrive due to imperfect filtering in
3233                  * promiscuous mode on some skews
3234                  */
3235                 if (unlikely(rxcp->port != adapter->port_num &&
3236                              !lancer_chip(adapter))) {
3237                         be_rx_compl_discard(rxo, rxcp);
3238                         goto loop_continue;
3239                 }
3240
3241                 if (do_gro(rxcp))
3242                         be_rx_compl_process_gro(rxo, napi, rxcp);
3243                 else
3244                         be_rx_compl_process(rxo, napi, rxcp);
3245
3246 loop_continue:
3247                 frags_consumed += rxcp->num_rcvd;
3248                 be_rx_stats_update(rxo, rxcp);
3249         }
3250
3251         if (work_done) {
3252                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3253
3254                 /* When an rx-obj gets into post_starved state, just
3255                  * let be_worker do the posting.
3256                  */
3257                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3258                     !rxo->rx_post_starved)
3259                         be_post_rx_frags(rxo, GFP_ATOMIC,
3260                                          max_t(u32, MAX_RX_POST,
3261                                                frags_consumed));
3262         }
3263
3264         return work_done;
3265 }
3266
3267
3268 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3269                           int idx)
3270 {
3271         int num_wrbs = 0, work_done = 0;
3272         struct be_tx_compl_info *txcp;
3273
3274         while ((txcp = be_tx_compl_get(adapter, txo))) {
3275                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3276                 work_done++;
3277         }
3278
3279         if (work_done) {
3280                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3281                 atomic_sub(num_wrbs, &txo->q.used);
3282
3283                 /* As Tx wrbs have been freed up, wake up netdev queue
3284                  * if it was stopped due to lack of tx wrbs.  */
3285                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3286                     be_can_txq_wake(txo)) {
3287                         netif_wake_subqueue(adapter->netdev, idx);
3288                 }
3289
3290                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3291                 tx_stats(txo)->tx_compl += work_done;
3292                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3293         }
3294 }
3295
3296 int be_poll(struct napi_struct *napi, int budget)
3297 {
3298         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3299         struct be_adapter *adapter = eqo->adapter;
3300         int max_work = 0, work, i, num_evts;
3301         struct be_rx_obj *rxo;
3302         struct be_tx_obj *txo;
3303         u32 mult_enc = 0;
3304
3305         num_evts = events_get(eqo);
3306
3307         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3308                 be_process_tx(adapter, txo, i);
3309
3310         /* This loop will iterate twice for EQ0 in which
3311          * completions of the last RXQ (default one) are also processed
3312          * For other EQs the loop iterates only once
3313          */
3314         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3315                 work = be_process_rx(rxo, napi, budget);
3316                 max_work = max(work, max_work);
3317         }
3318
3319         if (is_mcc_eqo(eqo))
3320                 be_process_mcc(adapter);
3321
3322         if (max_work < budget) {
3323                 napi_complete_done(napi, max_work);
3324
3325                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3326                  * delay via a delay multiplier encoding value
3327                  */
3328                 if (skyhawk_chip(adapter))
3329                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3330
3331                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3332                              mult_enc);
3333         } else {
3334                 /* As we'll continue in polling mode, count and clear events */
3335                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3336         }
3337         return max_work;
3338 }
3339
3340 void be_detect_error(struct be_adapter *adapter)
3341 {
3342         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3343         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3344         struct device *dev = &adapter->pdev->dev;
3345         u16 val;
3346         u32 i;
3347
3348         if (be_check_error(adapter, BE_ERROR_HW))
3349                 return;
3350
3351         if (lancer_chip(adapter)) {
3352                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3353                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3354                         be_set_error(adapter, BE_ERROR_UE);
3355                         sliport_err1 = ioread32(adapter->db +
3356                                                 SLIPORT_ERROR1_OFFSET);
3357                         sliport_err2 = ioread32(adapter->db +
3358                                                 SLIPORT_ERROR2_OFFSET);
3359                         /* Do not log error messages if its a FW reset */
3360                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3361                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3362                                 dev_info(dev, "Reset is in progress\n");
3363                         } else {
3364                                 dev_err(dev, "Error detected in the card\n");
3365                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3366                                         sliport_status);
3367                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3368                                         sliport_err1);
3369                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3370                                         sliport_err2);
3371                         }
3372                 }
3373         } else {
3374                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3375                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3376                 ue_lo_mask = ioread32(adapter->pcicfg +
3377                                       PCICFG_UE_STATUS_LOW_MASK);
3378                 ue_hi_mask = ioread32(adapter->pcicfg +
3379                                       PCICFG_UE_STATUS_HI_MASK);
3380
3381                 ue_lo = (ue_lo & ~ue_lo_mask);
3382                 ue_hi = (ue_hi & ~ue_hi_mask);
3383
3384                 if (ue_lo || ue_hi) {
3385                         /* On certain platforms BE3 hardware can indicate
3386                          * spurious UEs. In case of a UE in the chip,
3387                          * the POST register correctly reports either a
3388                          * FAT_LOG_START state (FW is currently dumping
3389                          * FAT log data) or a ARMFW_UE state. Check for the
3390                          * above states to ascertain if the UE is valid or not.
3391                          */
3392                         if (BE3_chip(adapter)) {
3393                                 val = be_POST_stage_get(adapter);
3394                                 if ((val & POST_STAGE_FAT_LOG_START)
3395                                      != POST_STAGE_FAT_LOG_START &&
3396                                     (val & POST_STAGE_ARMFW_UE)
3397                                      != POST_STAGE_ARMFW_UE &&
3398                                     (val & POST_STAGE_RECOVERABLE_ERR)
3399                                      != POST_STAGE_RECOVERABLE_ERR)
3400                                         return;
3401                         }
3402
3403                         dev_err(dev, "Error detected in the adapter");
3404                         be_set_error(adapter, BE_ERROR_UE);
3405
3406                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3407                                 if (ue_lo & 1)
3408                                         dev_err(dev, "UE: %s bit set\n",
3409                                                 ue_status_low_desc[i]);
3410                         }
3411                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3412                                 if (ue_hi & 1)
3413                                         dev_err(dev, "UE: %s bit set\n",
3414                                                 ue_status_hi_desc[i]);
3415                         }
3416                 }
3417         }
3418 }
3419
3420 static void be_msix_disable(struct be_adapter *adapter)
3421 {
3422         if (msix_enabled(adapter)) {
3423                 pci_disable_msix(adapter->pdev);
3424                 adapter->num_msix_vec = 0;
3425                 adapter->num_msix_roce_vec = 0;
3426         }
3427 }
3428
3429 static int be_msix_enable(struct be_adapter *adapter)
3430 {
3431         unsigned int i, max_roce_eqs;
3432         struct device *dev = &adapter->pdev->dev;
3433         int num_vec;
3434
3435         /* If RoCE is supported, program the max number of vectors that
3436          * could be used for NIC and RoCE, else, just program the number
3437          * we'll use initially.
3438          */
3439         if (be_roce_supported(adapter)) {
3440                 max_roce_eqs =
3441                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3442                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3443                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3444         } else {
3445                 num_vec = max(adapter->cfg_num_rx_irqs,
3446                               adapter->cfg_num_tx_irqs);
3447         }
3448
3449         for (i = 0; i < num_vec; i++)
3450                 adapter->msix_entries[i].entry = i;
3451
3452         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3453                                         MIN_MSIX_VECTORS, num_vec);
3454         if (num_vec < 0)
3455                 goto fail;
3456
3457         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3458                 adapter->num_msix_roce_vec = num_vec / 2;
3459                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3460                          adapter->num_msix_roce_vec);
3461         }
3462
3463         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3464
3465         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3466                  adapter->num_msix_vec);
3467         return 0;
3468
3469 fail:
3470         dev_warn(dev, "MSIx enable failed\n");
3471
3472         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3473         if (be_virtfn(adapter))
3474                 return num_vec;
3475         return 0;
3476 }
3477
3478 static inline int be_msix_vec_get(struct be_adapter *adapter,
3479                                   struct be_eq_obj *eqo)
3480 {
3481         return adapter->msix_entries[eqo->msix_idx].vector;
3482 }
3483
3484 static int be_msix_register(struct be_adapter *adapter)
3485 {
3486         struct net_device *netdev = adapter->netdev;
3487         struct be_eq_obj *eqo;
3488         int status, i, vec;
3489
3490         for_all_evt_queues(adapter, eqo, i) {
3491                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3492                 vec = be_msix_vec_get(adapter, eqo);
3493                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3494                 if (status)
3495                         goto err_msix;
3496
3497                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3498         }
3499
3500         return 0;
3501 err_msix:
3502         for (i--; i >= 0; i--) {
3503                 eqo = &adapter->eq_obj[i];
3504                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3505         }
3506         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3507                  status);
3508         be_msix_disable(adapter);
3509         return status;
3510 }
3511
3512 static int be_irq_register(struct be_adapter *adapter)
3513 {
3514         struct net_device *netdev = adapter->netdev;
3515         int status;
3516
3517         if (msix_enabled(adapter)) {
3518                 status = be_msix_register(adapter);
3519                 if (status == 0)
3520                         goto done;
3521                 /* INTx is not supported for VF */
3522                 if (be_virtfn(adapter))
3523                         return status;
3524         }
3525
3526         /* INTx: only the first EQ is used */
3527         netdev->irq = adapter->pdev->irq;
3528         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3529                              &adapter->eq_obj[0]);
3530         if (status) {
3531                 dev_err(&adapter->pdev->dev,
3532                         "INTx request IRQ failed - err %d\n", status);
3533                 return status;
3534         }
3535 done:
3536         adapter->isr_registered = true;
3537         return 0;
3538 }
3539
3540 static void be_irq_unregister(struct be_adapter *adapter)
3541 {
3542         struct net_device *netdev = adapter->netdev;
3543         struct be_eq_obj *eqo;
3544         int i, vec;
3545
3546         if (!adapter->isr_registered)
3547                 return;
3548
3549         /* INTx */
3550         if (!msix_enabled(adapter)) {
3551                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3552                 goto done;
3553         }
3554
3555         /* MSIx */
3556         for_all_evt_queues(adapter, eqo, i) {
3557                 vec = be_msix_vec_get(adapter, eqo);
3558                 irq_set_affinity_hint(vec, NULL);
3559                 free_irq(vec, eqo);
3560         }
3561
3562 done:
3563         adapter->isr_registered = false;
3564 }
3565
3566 static void be_rx_qs_destroy(struct be_adapter *adapter)
3567 {
3568         struct rss_info *rss = &adapter->rss_info;
3569         struct be_queue_info *q;
3570         struct be_rx_obj *rxo;
3571         int i;
3572
3573         for_all_rx_queues(adapter, rxo, i) {
3574                 q = &rxo->q;
3575                 if (q->created) {
3576                         /* If RXQs are destroyed while in an "out of buffer"
3577                          * state, there is a possibility of an HW stall on
3578                          * Lancer. So, post 64 buffers to each queue to relieve
3579                          * the "out of buffer" condition.
3580                          * Make sure there's space in the RXQ before posting.
3581                          */
3582                         if (lancer_chip(adapter)) {
3583                                 be_rx_cq_clean(rxo);
3584                                 if (atomic_read(&q->used) == 0)
3585                                         be_post_rx_frags(rxo, GFP_KERNEL,
3586                                                          MAX_RX_POST);
3587                         }
3588
3589                         be_cmd_rxq_destroy(adapter, q);
3590                         be_rx_cq_clean(rxo);
3591                         be_rxq_clean(rxo);
3592                 }
3593                 be_queue_free(adapter, q);
3594         }
3595
3596         if (rss->rss_flags) {
3597                 rss->rss_flags = RSS_ENABLE_NONE;
3598                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3599                                   128, rss->rss_hkey);
3600         }
3601 }
3602
3603 static void be_disable_if_filters(struct be_adapter *adapter)
3604 {
3605         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3606         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3607             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3608                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3609                 eth_zero_addr(adapter->dev_mac);
3610         }
3611
3612         be_clear_uc_list(adapter);
3613         be_clear_mc_list(adapter);
3614
3615         /* The IFACE flags are enabled in the open path and cleared
3616          * in the close path. When a VF gets detached from the host and
3617          * assigned to a VM the following happens:
3618          *      - VF's IFACE flags get cleared in the detach path
3619          *      - IFACE create is issued by the VF in the attach path
3620          * Due to a bug in the BE3/Skyhawk-R FW
3621          * (Lancer FW doesn't have the bug), the IFACE capability flags
3622          * specified along with the IFACE create cmd issued by a VF are not
3623          * honoured by FW.  As a consequence, if a *new* driver
3624          * (that enables/disables IFACE flags in open/close)
3625          * is loaded in the host and an *old* driver is * used by a VM/VF,
3626          * the IFACE gets created *without* the needed flags.
3627          * To avoid this, disable RX-filter flags only for Lancer.
3628          */
3629         if (lancer_chip(adapter)) {
3630                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3631                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3632         }
3633 }
3634
3635 static int be_close(struct net_device *netdev)
3636 {
3637         struct be_adapter *adapter = netdev_priv(netdev);
3638         struct be_eq_obj *eqo;
3639         int i;
3640
3641         /* This protection is needed as be_close() may be called even when the
3642          * adapter is in cleared state (after eeh perm failure)
3643          */
3644         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3645                 return 0;
3646
3647         /* Before attempting cleanup ensure all the pending cmds in the
3648          * config_wq have finished execution
3649          */
3650         flush_workqueue(be_wq);
3651
3652         be_disable_if_filters(adapter);
3653
3654         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3655                 for_all_evt_queues(adapter, eqo, i) {
3656                         napi_disable(&eqo->napi);
3657                 }
3658                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3659         }
3660
3661         be_async_mcc_disable(adapter);
3662
3663         /* Wait for all pending tx completions to arrive so that
3664          * all tx skbs are freed.
3665          */
3666         netif_tx_disable(netdev);
3667         be_tx_compl_clean(adapter);
3668
3669         be_rx_qs_destroy(adapter);
3670
3671         for_all_evt_queues(adapter, eqo, i) {
3672                 if (msix_enabled(adapter))
3673                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3674                 else
3675                         synchronize_irq(netdev->irq);
3676                 be_eq_clean(eqo);
3677         }
3678
3679         be_irq_unregister(adapter);
3680
3681         return 0;
3682 }
3683
3684 static int be_rx_qs_create(struct be_adapter *adapter)
3685 {
3686         struct rss_info *rss = &adapter->rss_info;
3687         u8 rss_key[RSS_HASH_KEY_LEN];
3688         struct be_rx_obj *rxo;
3689         int rc, i, j;
3690
3691         for_all_rx_queues(adapter, rxo, i) {
3692                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3693                                     sizeof(struct be_eth_rx_d));
3694                 if (rc)
3695                         return rc;
3696         }
3697
3698         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3699                 rxo = default_rxo(adapter);
3700                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3701                                        rx_frag_size, adapter->if_handle,
3702                                        false, &rxo->rss_id);
3703                 if (rc)
3704                         return rc;
3705         }
3706
3707         for_all_rss_queues(adapter, rxo, i) {
3708                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3709                                        rx_frag_size, adapter->if_handle,
3710                                        true, &rxo->rss_id);
3711                 if (rc)
3712                         return rc;
3713         }
3714
3715         if (be_multi_rxq(adapter)) {
3716                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3717                         for_all_rss_queues(adapter, rxo, i) {
3718                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3719                                         break;
3720                                 rss->rsstable[j + i] = rxo->rss_id;
3721                                 rss->rss_queue[j + i] = i;
3722                         }
3723                 }
3724                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3725                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3726
3727                 if (!BEx_chip(adapter))
3728                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3729                                 RSS_ENABLE_UDP_IPV6;
3730
3731                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3732                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3733                                        RSS_INDIR_TABLE_LEN, rss_key);
3734                 if (rc) {
3735                         rss->rss_flags = RSS_ENABLE_NONE;
3736                         return rc;
3737                 }
3738
3739                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3740         } else {
3741                 /* Disable RSS, if only default RX Q is created */
3742                 rss->rss_flags = RSS_ENABLE_NONE;
3743         }
3744
3745
3746         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3747          * which is a queue empty condition
3748          */
3749         for_all_rx_queues(adapter, rxo, i)
3750                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3751
3752         return 0;
3753 }
3754
3755 static int be_enable_if_filters(struct be_adapter *adapter)
3756 {
3757         int status;
3758
3759         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3760         if (status)
3761                 return status;
3762
3763         /* Normally this condition usually true as the ->dev_mac is zeroed.
3764          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3765          * subsequent be_dev_mac_add() can fail (after fresh boot)
3766          */
3767         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3768                 int old_pmac_id = -1;
3769
3770                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3771                 if (!is_zero_ether_addr(adapter->dev_mac))
3772                         old_pmac_id = adapter->pmac_id[0];
3773
3774                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3775                 if (status)
3776                         return status;
3777
3778                 /* Delete the old programmed MAC as we successfully programmed
3779                  * a new MAC
3780                  */
3781                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3782                         be_dev_mac_del(adapter, old_pmac_id);
3783
3784                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3785         }
3786
3787         if (adapter->vlans_added)
3788                 be_vid_config(adapter);
3789
3790         __be_set_rx_mode(adapter);
3791
3792         return 0;
3793 }
3794
3795 static int be_open(struct net_device *netdev)
3796 {
3797         struct be_adapter *adapter = netdev_priv(netdev);
3798         struct be_eq_obj *eqo;
3799         struct be_rx_obj *rxo;
3800         struct be_tx_obj *txo;
3801         u8 link_status;
3802         int status, i;
3803
3804         status = be_rx_qs_create(adapter);
3805         if (status)
3806                 goto err;
3807
3808         status = be_enable_if_filters(adapter);
3809         if (status)
3810                 goto err;
3811
3812         status = be_irq_register(adapter);
3813         if (status)
3814                 goto err;
3815
3816         for_all_rx_queues(adapter, rxo, i)
3817                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3818
3819         for_all_tx_queues(adapter, txo, i)
3820                 be_cq_notify(adapter, txo->cq.id, true, 0);
3821
3822         be_async_mcc_enable(adapter);
3823
3824         for_all_evt_queues(adapter, eqo, i) {
3825                 napi_enable(&eqo->napi);
3826                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3827         }
3828         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3829
3830         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3831         if (!status)
3832                 be_link_status_update(adapter, link_status);
3833
3834         netif_tx_start_all_queues(netdev);
3835         if (skyhawk_chip(adapter))
3836                 udp_tunnel_get_rx_info(netdev);
3837
3838         return 0;
3839 err:
3840         be_close(adapter->netdev);
3841         return -EIO;
3842 }
3843
3844 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3845 {
3846         u32 addr;
3847
3848         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3849
3850         mac[5] = (u8)(addr & 0xFF);
3851         mac[4] = (u8)((addr >> 8) & 0xFF);
3852         mac[3] = (u8)((addr >> 16) & 0xFF);
3853         /* Use the OUI from the current MAC address */
3854         memcpy(mac, adapter->netdev->dev_addr, 3);
3855 }
3856
3857 /*
3858  * Generate a seed MAC address from the PF MAC Address using jhash.
3859  * MAC Address for VFs are assigned incrementally starting from the seed.
3860  * These addresses are programmed in the ASIC by the PF and the VF driver
3861  * queries for the MAC address during its probe.
3862  */
3863 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3864 {
3865         u32 vf;
3866         int status = 0;
3867         u8 mac[ETH_ALEN];
3868         struct be_vf_cfg *vf_cfg;
3869
3870         be_vf_eth_addr_generate(adapter, mac);
3871
3872         for_all_vfs(adapter, vf_cfg, vf) {
3873                 if (BEx_chip(adapter))
3874                         status = be_cmd_pmac_add(adapter, mac,
3875                                                  vf_cfg->if_handle,
3876                                                  &vf_cfg->pmac_id, vf + 1);
3877                 else
3878                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3879                                                 vf + 1);
3880
3881                 if (status)
3882                         dev_err(&adapter->pdev->dev,
3883                                 "Mac address assignment failed for VF %d\n",
3884                                 vf);
3885                 else
3886                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3887
3888                 mac[5] += 1;
3889         }
3890         return status;
3891 }
3892
3893 static int be_vfs_mac_query(struct be_adapter *adapter)
3894 {
3895         int status, vf;
3896         u8 mac[ETH_ALEN];
3897         struct be_vf_cfg *vf_cfg;
3898
3899         for_all_vfs(adapter, vf_cfg, vf) {
3900                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3901                                                mac, vf_cfg->if_handle,
3902                                                false, vf+1);
3903                 if (status)
3904                         return status;
3905                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3906         }
3907         return 0;
3908 }
3909
3910 static void be_vf_clear(struct be_adapter *adapter)
3911 {
3912         struct be_vf_cfg *vf_cfg;
3913         u32 vf;
3914
3915         if (pci_vfs_assigned(adapter->pdev)) {
3916                 dev_warn(&adapter->pdev->dev,
3917                          "VFs are assigned to VMs: not disabling VFs\n");
3918                 goto done;
3919         }
3920
3921         pci_disable_sriov(adapter->pdev);
3922
3923         for_all_vfs(adapter, vf_cfg, vf) {
3924                 if (BEx_chip(adapter))
3925                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3926                                         vf_cfg->pmac_id, vf + 1);
3927                 else
3928                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3929                                        vf + 1);
3930
3931                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3932         }
3933
3934         if (BE3_chip(adapter))
3935                 be_cmd_set_hsw_config(adapter, 0, 0,
3936                                       adapter->if_handle,
3937                                       PORT_FWD_TYPE_PASSTHRU, 0);
3938 done:
3939         kfree(adapter->vf_cfg);
3940         adapter->num_vfs = 0;
3941         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3942 }
3943
3944 static void be_clear_queues(struct be_adapter *adapter)
3945 {
3946         be_mcc_queues_destroy(adapter);
3947         be_rx_cqs_destroy(adapter);
3948         be_tx_queues_destroy(adapter);
3949         be_evt_queues_destroy(adapter);
3950 }
3951
3952 static void be_cancel_worker(struct be_adapter *adapter)
3953 {
3954         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3955                 cancel_delayed_work_sync(&adapter->work);
3956                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3957         }
3958 }
3959
3960 static void be_cancel_err_detection(struct be_adapter *adapter)
3961 {
3962         struct be_error_recovery *err_rec = &adapter->error_recovery;
3963
3964         if (!be_err_recovery_workq)
3965                 return;
3966
3967         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3968                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3969                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3970         }
3971 }
3972
3973 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3974 {
3975         struct net_device *netdev = adapter->netdev;
3976         struct device *dev = &adapter->pdev->dev;
3977         struct be_vxlan_port *vxlan_port;
3978         __be16 port;
3979         int status;
3980
3981         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3982                                       struct be_vxlan_port, list);
3983         port = vxlan_port->port;
3984
3985         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3986                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3987         if (status) {
3988                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3989                 return status;
3990         }
3991         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3992
3993         status = be_cmd_set_vxlan_port(adapter, port);
3994         if (status) {
3995                 dev_warn(dev, "Failed to add VxLAN port\n");
3996                 return status;
3997         }
3998         adapter->vxlan_port = port;
3999
4000         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4001                                    NETIF_F_TSO | NETIF_F_TSO6 |
4002                                    NETIF_F_GSO_UDP_TUNNEL;
4003
4004         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4005                  be16_to_cpu(port));
4006         return 0;
4007 }
4008
4009 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4010 {
4011         struct net_device *netdev = adapter->netdev;
4012
4013         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4014                 be_cmd_manage_iface(adapter, adapter->if_handle,
4015                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4016
4017         if (adapter->vxlan_port)
4018                 be_cmd_set_vxlan_port(adapter, 0);
4019
4020         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4021         adapter->vxlan_port = 0;
4022
4023         netdev->hw_enc_features = 0;
4024 }
4025
4026 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4027                                 struct be_resources *vft_res)
4028 {
4029         struct be_resources res = adapter->pool_res;
4030         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4031         struct be_resources res_mod = {0};
4032         u16 num_vf_qs = 1;
4033
4034         /* Distribute the queue resources among the PF and it's VFs */
4035         if (num_vfs) {
4036                 /* Divide the rx queues evenly among the VFs and the PF, capped
4037                  * at VF-EQ-count. Any remainder queues belong to the PF.
4038                  */
4039                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4040                                 res.max_rss_qs / (num_vfs + 1));
4041
4042                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4043                  * RSS Tables per port. Provide RSS on VFs, only if number of
4044                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4045                  */
4046                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4047                         num_vf_qs = 1;
4048         }
4049
4050         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4051          * which are modifiable using SET_PROFILE_CONFIG cmd.
4052          */
4053         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4054                                   RESOURCE_MODIFIABLE, 0);
4055
4056         /* If RSS IFACE capability flags are modifiable for a VF, set the
4057          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4058          * more than 1 RSSQ is available for a VF.
4059          * Otherwise, provision only 1 queue pair for VF.
4060          */
4061         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4062                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4063                 if (num_vf_qs > 1) {
4064                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4065                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4066                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4067                 } else {
4068                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4069                                              BE_IF_FLAGS_DEFQ_RSS);
4070                 }
4071         } else {
4072                 num_vf_qs = 1;
4073         }
4074
4075         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4076                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4077                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4078         }
4079
4080         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4081         vft_res->max_rx_qs = num_vf_qs;
4082         vft_res->max_rss_qs = num_vf_qs;
4083         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4084         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4085
4086         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4087          * among the PF and it's VFs, if the fields are changeable
4088          */
4089         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4090                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4091
4092         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4093                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4094
4095         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4096                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4097
4098         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4099                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4100 }
4101
4102 static void be_if_destroy(struct be_adapter *adapter)
4103 {
4104         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4105
4106         kfree(adapter->pmac_id);
4107         adapter->pmac_id = NULL;
4108
4109         kfree(adapter->mc_list);
4110         adapter->mc_list = NULL;
4111
4112         kfree(adapter->uc_list);
4113         adapter->uc_list = NULL;
4114 }
4115
4116 static int be_clear(struct be_adapter *adapter)
4117 {
4118         struct pci_dev *pdev = adapter->pdev;
4119         struct  be_resources vft_res = {0};
4120
4121         be_cancel_worker(adapter);
4122
4123         flush_workqueue(be_wq);
4124
4125         if (sriov_enabled(adapter))
4126                 be_vf_clear(adapter);
4127
4128         /* Re-configure FW to distribute resources evenly across max-supported
4129          * number of VFs, only when VFs are not already enabled.
4130          */
4131         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4132             !pci_vfs_assigned(pdev)) {
4133                 be_calculate_vf_res(adapter,
4134                                     pci_sriov_get_totalvfs(pdev),
4135                                     &vft_res);
4136                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4137                                         pci_sriov_get_totalvfs(pdev),
4138                                         &vft_res);
4139         }
4140
4141         be_disable_vxlan_offloads(adapter);
4142
4143         be_if_destroy(adapter);
4144
4145         be_clear_queues(adapter);
4146
4147         be_msix_disable(adapter);
4148         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4149         return 0;
4150 }
4151
4152 static int be_vfs_if_create(struct be_adapter *adapter)
4153 {
4154         struct be_resources res = {0};
4155         u32 cap_flags, en_flags, vf;
4156         struct be_vf_cfg *vf_cfg;
4157         int status;
4158
4159         /* If a FW profile exists, then cap_flags are updated */
4160         cap_flags = BE_VF_IF_EN_FLAGS;
4161
4162         for_all_vfs(adapter, vf_cfg, vf) {
4163                 if (!BE3_chip(adapter)) {
4164                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4165                                                            ACTIVE_PROFILE_TYPE,
4166                                                            RESOURCE_LIMITS,
4167                                                            vf + 1);
4168                         if (!status) {
4169                                 cap_flags = res.if_cap_flags;
4170                                 /* Prevent VFs from enabling VLAN promiscuous
4171                                  * mode
4172                                  */
4173                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4174                         }
4175                 }
4176
4177                 /* PF should enable IF flags during proxy if_create call */
4178                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4179                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4180                                           &vf_cfg->if_handle, vf + 1);
4181                 if (status)
4182                         return status;
4183         }
4184
4185         return 0;
4186 }
4187
4188 static int be_vf_setup_init(struct be_adapter *adapter)
4189 {
4190         struct be_vf_cfg *vf_cfg;
4191         int vf;
4192
4193         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4194                                   GFP_KERNEL);
4195         if (!adapter->vf_cfg)
4196                 return -ENOMEM;
4197
4198         for_all_vfs(adapter, vf_cfg, vf) {
4199                 vf_cfg->if_handle = -1;
4200                 vf_cfg->pmac_id = -1;
4201         }
4202         return 0;
4203 }
4204
4205 static int be_vf_setup(struct be_adapter *adapter)
4206 {
4207         struct device *dev = &adapter->pdev->dev;
4208         struct be_vf_cfg *vf_cfg;
4209         int status, old_vfs, vf;
4210         bool spoofchk;
4211
4212         old_vfs = pci_num_vf(adapter->pdev);
4213
4214         status = be_vf_setup_init(adapter);
4215         if (status)
4216                 goto err;
4217
4218         if (old_vfs) {
4219                 for_all_vfs(adapter, vf_cfg, vf) {
4220                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4221                         if (status)
4222                                 goto err;
4223                 }
4224
4225                 status = be_vfs_mac_query(adapter);
4226                 if (status)
4227                         goto err;
4228         } else {
4229                 status = be_vfs_if_create(adapter);
4230                 if (status)
4231                         goto err;
4232
4233                 status = be_vf_eth_addr_config(adapter);
4234                 if (status)
4235                         goto err;
4236         }
4237
4238         for_all_vfs(adapter, vf_cfg, vf) {
4239                 /* Allow VFs to programs MAC/VLAN filters */
4240                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4241                                                   vf + 1);
4242                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4243                         status = be_cmd_set_fn_privileges(adapter,
4244                                                           vf_cfg->privileges |
4245                                                           BE_PRIV_FILTMGMT,
4246                                                           vf + 1);
4247                         if (!status) {
4248                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4249                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4250                                          vf);
4251                         }
4252                 }
4253
4254                 /* Allow full available bandwidth */
4255                 if (!old_vfs)
4256                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4257
4258                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4259                                                vf_cfg->if_handle, NULL,
4260                                                &spoofchk);
4261                 if (!status)
4262                         vf_cfg->spoofchk = spoofchk;
4263
4264                 if (!old_vfs) {
4265                         be_cmd_enable_vf(adapter, vf + 1);
4266                         be_cmd_set_logical_link_config(adapter,
4267                                                        IFLA_VF_LINK_STATE_AUTO,
4268                                                        vf+1);
4269                 }
4270         }
4271
4272         if (!old_vfs) {
4273                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4274                 if (status) {
4275                         dev_err(dev, "SRIOV enable failed\n");
4276                         adapter->num_vfs = 0;
4277                         goto err;
4278                 }
4279         }
4280
4281         if (BE3_chip(adapter)) {
4282                 /* On BE3, enable VEB only when SRIOV is enabled */
4283                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4284                                                adapter->if_handle,
4285                                                PORT_FWD_TYPE_VEB, 0);
4286                 if (status)
4287                         goto err;
4288         }
4289
4290         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4291         return 0;
4292 err:
4293         dev_err(dev, "VF setup failed\n");
4294         be_vf_clear(adapter);
4295         return status;
4296 }
4297
4298 /* Converting function_mode bits on BE3 to SH mc_type enums */
4299
4300 static u8 be_convert_mc_type(u32 function_mode)
4301 {
4302         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4303                 return vNIC1;
4304         else if (function_mode & QNQ_MODE)
4305                 return FLEX10;
4306         else if (function_mode & VNIC_MODE)
4307                 return vNIC2;
4308         else if (function_mode & UMC_ENABLED)
4309                 return UMC;
4310         else
4311                 return MC_NONE;
4312 }
4313
4314 /* On BE2/BE3 FW does not suggest the supported limits */
4315 static void BEx_get_resources(struct be_adapter *adapter,
4316                               struct be_resources *res)
4317 {
4318         bool use_sriov = adapter->num_vfs ? 1 : 0;
4319
4320         if (be_physfn(adapter))
4321                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4322         else
4323                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4324
4325         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4326
4327         if (be_is_mc(adapter)) {
4328                 /* Assuming that there are 4 channels per port,
4329                  * when multi-channel is enabled
4330                  */
4331                 if (be_is_qnq_mode(adapter))
4332                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4333                 else
4334                         /* In a non-qnq multichannel mode, the pvid
4335                          * takes up one vlan entry
4336                          */
4337                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4338         } else {
4339                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4340         }
4341
4342         res->max_mcast_mac = BE_MAX_MC;
4343
4344         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4345          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4346          *    *only* if it is RSS-capable.
4347          */
4348         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4349             be_virtfn(adapter) ||
4350             (be_is_mc(adapter) &&
4351              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4352                 res->max_tx_qs = 1;
4353         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4354                 struct be_resources super_nic_res = {0};
4355
4356                 /* On a SuperNIC profile, the driver needs to use the
4357                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4358                  */
4359                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4360                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4361                                           0);
4362                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4363                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4364         } else {
4365                 res->max_tx_qs = BE3_MAX_TX_QS;
4366         }
4367
4368         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4369             !use_sriov && be_physfn(adapter))
4370                 res->max_rss_qs = (adapter->be3_native) ?
4371                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4372         res->max_rx_qs = res->max_rss_qs + 1;
4373
4374         if (be_physfn(adapter))
4375                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4376                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4377         else
4378                 res->max_evt_qs = 1;
4379
4380         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4381         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4382         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4383                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4384 }
4385
4386 static void be_setup_init(struct be_adapter *adapter)
4387 {
4388         adapter->vlan_prio_bmap = 0xff;
4389         adapter->phy.link_speed = -1;
4390         adapter->if_handle = -1;
4391         adapter->be3_native = false;
4392         adapter->if_flags = 0;
4393         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4394         if (be_physfn(adapter))
4395                 adapter->cmd_privileges = MAX_PRIVILEGES;
4396         else
4397                 adapter->cmd_privileges = MIN_PRIVILEGES;
4398 }
4399
4400 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4401  * However, this HW limitation is not exposed to the host via any SLI cmd.
4402  * As a result, in the case of SRIOV and in particular multi-partition configs
4403  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4404  * for distribution between the VFs. This self-imposed limit will determine the
4405  * no: of VFs for which RSS can be enabled.
4406  */
4407 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4408 {
4409         struct be_port_resources port_res = {0};
4410         u8 rss_tables_on_port;
4411         u16 max_vfs = be_max_vfs(adapter);
4412
4413         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4414                                   RESOURCE_LIMITS, 0);
4415
4416         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4417
4418         /* Each PF Pool's RSS Tables limit =
4419          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4420          */
4421         adapter->pool_res.max_rss_tables =
4422                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4423 }
4424
4425 static int be_get_sriov_config(struct be_adapter *adapter)
4426 {
4427         struct be_resources res = {0};
4428         int max_vfs, old_vfs;
4429
4430         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4431                                   RESOURCE_LIMITS, 0);
4432
4433         /* Some old versions of BE3 FW don't report max_vfs value */
4434         if (BE3_chip(adapter) && !res.max_vfs) {
4435                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4436                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4437         }
4438
4439         adapter->pool_res = res;
4440
4441         /* If during previous unload of the driver, the VFs were not disabled,
4442          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4443          * Instead use the TotalVFs value stored in the pci-dev struct.
4444          */
4445         old_vfs = pci_num_vf(adapter->pdev);
4446         if (old_vfs) {
4447                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4448                          old_vfs);
4449
4450                 adapter->pool_res.max_vfs =
4451                         pci_sriov_get_totalvfs(adapter->pdev);
4452                 adapter->num_vfs = old_vfs;
4453         }
4454
4455         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4456                 be_calculate_pf_pool_rss_tables(adapter);
4457                 dev_info(&adapter->pdev->dev,
4458                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4459                          be_max_pf_pool_rss_tables(adapter));
4460         }
4461         return 0;
4462 }
4463
4464 static void be_alloc_sriov_res(struct be_adapter *adapter)
4465 {
4466         int old_vfs = pci_num_vf(adapter->pdev);
4467         struct  be_resources vft_res = {0};
4468         int status;
4469
4470         be_get_sriov_config(adapter);
4471
4472         if (!old_vfs)
4473                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4474
4475         /* When the HW is in SRIOV capable configuration, the PF-pool
4476          * resources are given to PF during driver load, if there are no
4477          * old VFs. This facility is not available in BE3 FW.
4478          * Also, this is done by FW in Lancer chip.
4479          */
4480         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4481                 be_calculate_vf_res(adapter, 0, &vft_res);
4482                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4483                                                  &vft_res);
4484                 if (status)
4485                         dev_err(&adapter->pdev->dev,
4486                                 "Failed to optimize SRIOV resources\n");
4487         }
4488 }
4489
4490 static int be_get_resources(struct be_adapter *adapter)
4491 {
4492         struct device *dev = &adapter->pdev->dev;
4493         struct be_resources res = {0};
4494         int status;
4495
4496         /* For Lancer, SH etc read per-function resource limits from FW.
4497          * GET_FUNC_CONFIG returns per function guaranteed limits.
4498          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4499          */
4500         if (BEx_chip(adapter)) {
4501                 BEx_get_resources(adapter, &res);
4502         } else {
4503                 status = be_cmd_get_func_config(adapter, &res);
4504                 if (status)
4505                         return status;
4506
4507                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4508                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4509                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4510                         res.max_rss_qs -= 1;
4511         }
4512
4513         /* If RoCE is supported stash away half the EQs for RoCE */
4514         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4515                                 res.max_evt_qs / 2 : res.max_evt_qs;
4516         adapter->res = res;
4517
4518         /* If FW supports RSS default queue, then skip creating non-RSS
4519          * queue for non-IP traffic.
4520          */
4521         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4522                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4523
4524         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4525                  be_max_txqs(adapter), be_max_rxqs(adapter),
4526                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4527                  be_max_vfs(adapter));
4528         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4529                  be_max_uc(adapter), be_max_mc(adapter),
4530                  be_max_vlans(adapter));
4531
4532         /* Ensure RX and TX queues are created in pairs at init time */
4533         adapter->cfg_num_rx_irqs =
4534                                 min_t(u16, netif_get_num_default_rss_queues(),
4535                                       be_max_qp_irqs(adapter));
4536         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4537         return 0;
4538 }
4539
4540 static int be_get_config(struct be_adapter *adapter)
4541 {
4542         int status, level;
4543         u16 profile_id;
4544
4545         status = be_cmd_get_cntl_attributes(adapter);
4546         if (status)
4547                 return status;
4548
4549         status = be_cmd_query_fw_cfg(adapter);
4550         if (status)
4551                 return status;
4552
4553         if (!lancer_chip(adapter) && be_physfn(adapter))
4554                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4555
4556         if (BEx_chip(adapter)) {
4557                 level = be_cmd_get_fw_log_level(adapter);
4558                 adapter->msg_enable =
4559                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4560         }
4561
4562         be_cmd_get_acpi_wol_cap(adapter);
4563         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4564         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4565
4566         be_cmd_query_port_name(adapter);
4567
4568         if (be_physfn(adapter)) {
4569                 status = be_cmd_get_active_profile(adapter, &profile_id);
4570                 if (!status)
4571                         dev_info(&adapter->pdev->dev,
4572                                  "Using profile 0x%x\n", profile_id);
4573         }
4574
4575         return 0;
4576 }
4577
4578 static int be_mac_setup(struct be_adapter *adapter)
4579 {
4580         u8 mac[ETH_ALEN];
4581         int status;
4582
4583         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4584                 status = be_cmd_get_perm_mac(adapter, mac);
4585                 if (status)
4586                         return status;
4587
4588                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4589                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4590
4591                 /* Initial MAC for BE3 VFs is already programmed by PF */
4592                 if (BEx_chip(adapter) && be_virtfn(adapter))
4593                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4594         }
4595
4596         return 0;
4597 }
4598
4599 static void be_schedule_worker(struct be_adapter *adapter)
4600 {
4601         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4602         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4603 }
4604
4605 static void be_destroy_err_recovery_workq(void)
4606 {
4607         if (!be_err_recovery_workq)
4608                 return;
4609
4610         flush_workqueue(be_err_recovery_workq);
4611         destroy_workqueue(be_err_recovery_workq);
4612         be_err_recovery_workq = NULL;
4613 }
4614
4615 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4616 {
4617         struct be_error_recovery *err_rec = &adapter->error_recovery;
4618
4619         if (!be_err_recovery_workq)
4620                 return;
4621
4622         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4623                            msecs_to_jiffies(delay));
4624         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4625 }
4626
4627 static int be_setup_queues(struct be_adapter *adapter)
4628 {
4629         struct net_device *netdev = adapter->netdev;
4630         int status;
4631
4632         status = be_evt_queues_create(adapter);
4633         if (status)
4634                 goto err;
4635
4636         status = be_tx_qs_create(adapter);
4637         if (status)
4638                 goto err;
4639
4640         status = be_rx_cqs_create(adapter);
4641         if (status)
4642                 goto err;
4643
4644         status = be_mcc_queues_create(adapter);
4645         if (status)
4646                 goto err;
4647
4648         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4649         if (status)
4650                 goto err;
4651
4652         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4653         if (status)
4654                 goto err;
4655
4656         return 0;
4657 err:
4658         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4659         return status;
4660 }
4661
4662 static int be_if_create(struct be_adapter *adapter)
4663 {
4664         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4665         u32 cap_flags = be_if_cap_flags(adapter);
4666         int status;
4667
4668         /* alloc required memory for other filtering fields */
4669         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4670                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4671         if (!adapter->pmac_id)
4672                 return -ENOMEM;
4673
4674         adapter->mc_list = kcalloc(be_max_mc(adapter),
4675                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4676         if (!adapter->mc_list)
4677                 return -ENOMEM;
4678
4679         adapter->uc_list = kcalloc(be_max_uc(adapter),
4680                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4681         if (!adapter->uc_list)
4682                 return -ENOMEM;
4683
4684         if (adapter->cfg_num_rx_irqs == 1)
4685                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4686
4687         en_flags &= cap_flags;
4688         /* will enable all the needed filter flags in be_open() */
4689         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4690                                   &adapter->if_handle, 0);
4691
4692         if (status)
4693                 return status;
4694
4695         return 0;
4696 }
4697
4698 int be_update_queues(struct be_adapter *adapter)
4699 {
4700         struct net_device *netdev = adapter->netdev;
4701         int status;
4702
4703         if (netif_running(netdev))
4704                 be_close(netdev);
4705
4706         be_cancel_worker(adapter);
4707
4708         /* If any vectors have been shared with RoCE we cannot re-program
4709          * the MSIx table.
4710          */
4711         if (!adapter->num_msix_roce_vec)
4712                 be_msix_disable(adapter);
4713
4714         be_clear_queues(adapter);
4715         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4716         if (status)
4717                 return status;
4718
4719         if (!msix_enabled(adapter)) {
4720                 status = be_msix_enable(adapter);
4721                 if (status)
4722                         return status;
4723         }
4724
4725         status = be_if_create(adapter);
4726         if (status)
4727                 return status;
4728
4729         status = be_setup_queues(adapter);
4730         if (status)
4731                 return status;
4732
4733         be_schedule_worker(adapter);
4734
4735         /* The IF was destroyed and re-created. We need to clear
4736          * all promiscuous flags valid for the destroyed IF.
4737          * Without this promisc mode is not restored during
4738          * be_open() because the driver thinks that it is
4739          * already enabled in HW.
4740          */
4741         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4742
4743         if (netif_running(netdev))
4744                 status = be_open(netdev);
4745
4746         return status;
4747 }
4748
4749 static inline int fw_major_num(const char *fw_ver)
4750 {
4751         int fw_major = 0, i;
4752
4753         i = sscanf(fw_ver, "%d.", &fw_major);
4754         if (i != 1)
4755                 return 0;
4756
4757         return fw_major;
4758 }
4759
4760 /* If it is error recovery, FLR the PF
4761  * Else if any VFs are already enabled don't FLR the PF
4762  */
4763 static bool be_reset_required(struct be_adapter *adapter)
4764 {
4765         if (be_error_recovering(adapter))
4766                 return true;
4767         else
4768                 return pci_num_vf(adapter->pdev) == 0;
4769 }
4770
4771 /* Wait for the FW to be ready and perform the required initialization */
4772 static int be_func_init(struct be_adapter *adapter)
4773 {
4774         int status;
4775
4776         status = be_fw_wait_ready(adapter);
4777         if (status)
4778                 return status;
4779
4780         /* FW is now ready; clear errors to allow cmds/doorbell */
4781         be_clear_error(adapter, BE_CLEAR_ALL);
4782
4783         if (be_reset_required(adapter)) {
4784                 status = be_cmd_reset_function(adapter);
4785                 if (status)
4786                         return status;
4787
4788                 /* Wait for interrupts to quiesce after an FLR */
4789                 msleep(100);
4790         }
4791
4792         /* Tell FW we're ready to fire cmds */
4793         status = be_cmd_fw_init(adapter);
4794         if (status)
4795                 return status;
4796
4797         /* Allow interrupts for other ULPs running on NIC function */
4798         be_intr_set(adapter, true);
4799
4800         return 0;
4801 }
4802
4803 static int be_setup(struct be_adapter *adapter)
4804 {
4805         struct device *dev = &adapter->pdev->dev;
4806         int status;
4807
4808         status = be_func_init(adapter);
4809         if (status)
4810                 return status;
4811
4812         be_setup_init(adapter);
4813
4814         if (!lancer_chip(adapter))
4815                 be_cmd_req_native_mode(adapter);
4816
4817         /* invoke this cmd first to get pf_num and vf_num which are needed
4818          * for issuing profile related cmds
4819          */
4820         if (!BEx_chip(adapter)) {
4821                 status = be_cmd_get_func_config(adapter, NULL);
4822                 if (status)
4823                         return status;
4824         }
4825
4826         status = be_get_config(adapter);
4827         if (status)
4828                 goto err;
4829
4830         if (!BE2_chip(adapter) && be_physfn(adapter))
4831                 be_alloc_sriov_res(adapter);
4832
4833         status = be_get_resources(adapter);
4834         if (status)
4835                 goto err;
4836
4837         status = be_msix_enable(adapter);
4838         if (status)
4839                 goto err;
4840
4841         /* will enable all the needed filter flags in be_open() */
4842         status = be_if_create(adapter);
4843         if (status)
4844                 goto err;
4845
4846         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4847         rtnl_lock();
4848         status = be_setup_queues(adapter);
4849         rtnl_unlock();
4850         if (status)
4851                 goto err;
4852
4853         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4854
4855         status = be_mac_setup(adapter);
4856         if (status)
4857                 goto err;
4858
4859         be_cmd_get_fw_ver(adapter);
4860         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4861
4862         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4863                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4864                         adapter->fw_ver);
4865                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4866         }
4867
4868         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4869                                          adapter->rx_fc);
4870         if (status)
4871                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4872                                         &adapter->rx_fc);
4873
4874         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4875                  adapter->tx_fc, adapter->rx_fc);
4876
4877         if (be_physfn(adapter))
4878                 be_cmd_set_logical_link_config(adapter,
4879                                                IFLA_VF_LINK_STATE_AUTO, 0);
4880
4881         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4882          * confusing a linux bridge or OVS that it might be connected to.
4883          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4884          * when SRIOV is not enabled.
4885          */
4886         if (BE3_chip(adapter))
4887                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4888                                       PORT_FWD_TYPE_PASSTHRU, 0);
4889
4890         if (adapter->num_vfs)
4891                 be_vf_setup(adapter);
4892
4893         status = be_cmd_get_phy_info(adapter);
4894         if (!status && be_pause_supported(adapter))
4895                 adapter->phy.fc_autoneg = 1;
4896
4897         if (be_physfn(adapter) && !lancer_chip(adapter))
4898                 be_cmd_set_features(adapter);
4899
4900         be_schedule_worker(adapter);
4901         adapter->flags |= BE_FLAGS_SETUP_DONE;
4902         return 0;
4903 err:
4904         be_clear(adapter);
4905         return status;
4906 }
4907
4908 #ifdef CONFIG_NET_POLL_CONTROLLER
4909 static void be_netpoll(struct net_device *netdev)
4910 {
4911         struct be_adapter *adapter = netdev_priv(netdev);
4912         struct be_eq_obj *eqo;
4913         int i;
4914
4915         for_all_evt_queues(adapter, eqo, i) {
4916                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4917                 napi_schedule(&eqo->napi);
4918         }
4919 }
4920 #endif
4921
4922 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4923 {
4924         const struct firmware *fw;
4925         int status;
4926
4927         if (!netif_running(adapter->netdev)) {
4928                 dev_err(&adapter->pdev->dev,
4929                         "Firmware load not allowed (interface is down)\n");
4930                 return -ENETDOWN;
4931         }
4932
4933         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4934         if (status)
4935                 goto fw_exit;
4936
4937         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4938
4939         if (lancer_chip(adapter))
4940                 status = lancer_fw_download(adapter, fw);
4941         else
4942                 status = be_fw_download(adapter, fw);
4943
4944         if (!status)
4945                 be_cmd_get_fw_ver(adapter);
4946
4947 fw_exit:
4948         release_firmware(fw);
4949         return status;
4950 }
4951
4952 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4953                                  u16 flags)
4954 {
4955         struct be_adapter *adapter = netdev_priv(dev);
4956         struct nlattr *attr, *br_spec;
4957         int rem;
4958         int status = 0;
4959         u16 mode = 0;
4960
4961         if (!sriov_enabled(adapter))
4962                 return -EOPNOTSUPP;
4963
4964         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4965         if (!br_spec)
4966                 return -EINVAL;
4967
4968         nla_for_each_nested(attr, br_spec, rem) {
4969                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4970                         continue;
4971
4972                 if (nla_len(attr) < sizeof(mode))
4973                         return -EINVAL;
4974
4975                 mode = nla_get_u16(attr);
4976                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4977                         return -EOPNOTSUPP;
4978
4979                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4980                         return -EINVAL;
4981
4982                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4983                                                adapter->if_handle,
4984                                                mode == BRIDGE_MODE_VEPA ?
4985                                                PORT_FWD_TYPE_VEPA :
4986                                                PORT_FWD_TYPE_VEB, 0);
4987                 if (status)
4988                         goto err;
4989
4990                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4991                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4992
4993                 return status;
4994         }
4995 err:
4996         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4997                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4998
4999         return status;
5000 }
5001
5002 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5003                                  struct net_device *dev, u32 filter_mask,
5004                                  int nlflags)
5005 {
5006         struct be_adapter *adapter = netdev_priv(dev);
5007         int status = 0;
5008         u8 hsw_mode;
5009
5010         /* BE and Lancer chips support VEB mode only */
5011         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5012                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5013                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5014                         return 0;
5015                 hsw_mode = PORT_FWD_TYPE_VEB;
5016         } else {
5017                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5018                                                adapter->if_handle, &hsw_mode,
5019                                                NULL);
5020                 if (status)
5021                         return 0;
5022
5023                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5024                         return 0;
5025         }
5026
5027         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5028                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5029                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5030                                        0, 0, nlflags, filter_mask, NULL);
5031 }
5032
5033 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5034                                          void (*func)(struct work_struct *))
5035 {
5036         struct be_cmd_work *work;
5037
5038         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5039         if (!work) {
5040                 dev_err(&adapter->pdev->dev,
5041                         "be_work memory allocation failed\n");
5042                 return NULL;
5043         }
5044
5045         INIT_WORK(&work->work, func);
5046         work->adapter = adapter;
5047         return work;
5048 }
5049
5050 /* VxLAN offload Notes:
5051  *
5052  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5053  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5054  * is expected to work across all types of IP tunnels once exported. Skyhawk
5055  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5056  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5057  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5058  * those other tunnels are unexported on the fly through ndo_features_check().
5059  *
5060  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5061  * adds more than one port, disable offloads and re-enable them again when
5062  * there's only one port left. We maintain a list of ports for this purpose.
5063  */
5064 static void be_work_add_vxlan_port(struct work_struct *work)
5065 {
5066         struct be_cmd_work *cmd_work =
5067                                 container_of(work, struct be_cmd_work, work);
5068         struct be_adapter *adapter = cmd_work->adapter;
5069         struct device *dev = &adapter->pdev->dev;
5070         __be16 port = cmd_work->info.vxlan_port;
5071         struct be_vxlan_port *vxlan_port;
5072         int status;
5073
5074         /* Bump up the alias count if it is an existing port */
5075         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5076                 if (vxlan_port->port == port) {
5077                         vxlan_port->port_aliases++;
5078                         goto done;
5079                 }
5080         }
5081
5082         /* Add a new port to our list. We don't need a lock here since port
5083          * add/delete are done only in the context of a single-threaded work
5084          * queue (be_wq).
5085          */
5086         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5087         if (!vxlan_port)
5088                 goto done;
5089
5090         vxlan_port->port = port;
5091         INIT_LIST_HEAD(&vxlan_port->list);
5092         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5093         adapter->vxlan_port_count++;
5094
5095         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5096                 dev_info(dev,
5097                          "Only one UDP port supported for VxLAN offloads\n");
5098                 dev_info(dev, "Disabling VxLAN offloads\n");
5099                 goto err;
5100         }
5101
5102         if (adapter->vxlan_port_count > 1)
5103                 goto done;
5104
5105         status = be_enable_vxlan_offloads(adapter);
5106         if (!status)
5107                 goto done;
5108
5109 err:
5110         be_disable_vxlan_offloads(adapter);
5111 done:
5112         kfree(cmd_work);
5113         return;
5114 }
5115
5116 static void be_work_del_vxlan_port(struct work_struct *work)
5117 {
5118         struct be_cmd_work *cmd_work =
5119                                 container_of(work, struct be_cmd_work, work);
5120         struct be_adapter *adapter = cmd_work->adapter;
5121         __be16 port = cmd_work->info.vxlan_port;
5122         struct be_vxlan_port *vxlan_port;
5123
5124         /* Nothing to be done if a port alias is being deleted */
5125         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5126                 if (vxlan_port->port == port) {
5127                         if (vxlan_port->port_aliases) {
5128                                 vxlan_port->port_aliases--;
5129                                 goto done;
5130                         }
5131                         break;
5132                 }
5133         }
5134
5135         /* No port aliases left; delete the port from the list */
5136         list_del(&vxlan_port->list);
5137         adapter->vxlan_port_count--;
5138
5139         /* Disable VxLAN offload if this is the offloaded port */
5140         if (adapter->vxlan_port == vxlan_port->port) {
5141                 WARN_ON(adapter->vxlan_port_count);
5142                 be_disable_vxlan_offloads(adapter);
5143                 dev_info(&adapter->pdev->dev,
5144                          "Disabled VxLAN offloads for UDP port %d\n",
5145                          be16_to_cpu(port));
5146                 goto out;
5147         }
5148
5149         /* If only 1 port is left, re-enable VxLAN offload */
5150         if (adapter->vxlan_port_count == 1)
5151                 be_enable_vxlan_offloads(adapter);
5152
5153 out:
5154         kfree(vxlan_port);
5155 done:
5156         kfree(cmd_work);
5157 }
5158
5159 static void be_cfg_vxlan_port(struct net_device *netdev,
5160                               struct udp_tunnel_info *ti,
5161                               void (*func)(struct work_struct *))
5162 {
5163         struct be_adapter *adapter = netdev_priv(netdev);
5164         struct be_cmd_work *cmd_work;
5165
5166         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5167                 return;
5168
5169         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5170                 return;
5171
5172         cmd_work = be_alloc_work(adapter, func);
5173         if (cmd_work) {
5174                 cmd_work->info.vxlan_port = ti->port;
5175                 queue_work(be_wq, &cmd_work->work);
5176         }
5177 }
5178
5179 static void be_del_vxlan_port(struct net_device *netdev,
5180                               struct udp_tunnel_info *ti)
5181 {
5182         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5183 }
5184
5185 static void be_add_vxlan_port(struct net_device *netdev,
5186                               struct udp_tunnel_info *ti)
5187 {
5188         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5189 }
5190
5191 static netdev_features_t be_features_check(struct sk_buff *skb,
5192                                            struct net_device *dev,
5193                                            netdev_features_t features)
5194 {
5195         struct be_adapter *adapter = netdev_priv(dev);
5196         u8 l4_hdr = 0;
5197
5198         if (skb_is_gso(skb)) {
5199                 /* IPv6 TSO requests with extension hdrs are a problem
5200                  * to Lancer and BE3 HW. Disable TSO6 feature.
5201                  */
5202                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5203                         features &= ~NETIF_F_TSO6;
5204
5205                 /* Lancer cannot handle the packet with MSS less than 256.
5206                  * Also it can't handle a TSO packet with a single segment
5207                  * Disable the GSO support in such cases
5208                  */
5209                 if (lancer_chip(adapter) &&
5210                     (skb_shinfo(skb)->gso_size < 256 ||
5211                      skb_shinfo(skb)->gso_segs == 1))
5212                         features &= ~NETIF_F_GSO_MASK;
5213         }
5214
5215         /* The code below restricts offload features for some tunneled and
5216          * Q-in-Q packets.
5217          * Offload features for normal (non tunnel) packets are unchanged.
5218          */
5219         features = vlan_features_check(skb, features);
5220         if (!skb->encapsulation ||
5221             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5222                 return features;
5223
5224         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5225          * should disable tunnel offload features if it's not a VxLAN packet,
5226          * as tunnel offloads have been enabled only for VxLAN. This is done to
5227          * allow other tunneled traffic like GRE work fine while VxLAN
5228          * offloads are configured in Skyhawk-R.
5229          */
5230         switch (vlan_get_protocol(skb)) {
5231         case htons(ETH_P_IP):
5232                 l4_hdr = ip_hdr(skb)->protocol;
5233                 break;
5234         case htons(ETH_P_IPV6):
5235                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5236                 break;
5237         default:
5238                 return features;
5239         }
5240
5241         if (l4_hdr != IPPROTO_UDP ||
5242             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5243             skb->inner_protocol != htons(ETH_P_TEB) ||
5244             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5245                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5246             !adapter->vxlan_port ||
5247             udp_hdr(skb)->dest != adapter->vxlan_port)
5248                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5249
5250         return features;
5251 }
5252
5253 static int be_get_phys_port_id(struct net_device *dev,
5254                                struct netdev_phys_item_id *ppid)
5255 {
5256         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5257         struct be_adapter *adapter = netdev_priv(dev);
5258         u8 *id;
5259
5260         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5261                 return -ENOSPC;
5262
5263         ppid->id[0] = adapter->hba_port_num + 1;
5264         id = &ppid->id[1];
5265         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5266              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5267                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5268
5269         ppid->id_len = id_len;
5270
5271         return 0;
5272 }
5273
5274 static void be_set_rx_mode(struct net_device *dev)
5275 {
5276         struct be_adapter *adapter = netdev_priv(dev);
5277         struct be_cmd_work *work;
5278
5279         work = be_alloc_work(adapter, be_work_set_rx_mode);
5280         if (work)
5281                 queue_work(be_wq, &work->work);
5282 }
5283
5284 static const struct net_device_ops be_netdev_ops = {
5285         .ndo_open               = be_open,
5286         .ndo_stop               = be_close,
5287         .ndo_start_xmit         = be_xmit,
5288         .ndo_set_rx_mode        = be_set_rx_mode,
5289         .ndo_set_mac_address    = be_mac_addr_set,
5290         .ndo_get_stats64        = be_get_stats64,
5291         .ndo_validate_addr      = eth_validate_addr,
5292         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5293         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5294         .ndo_set_vf_mac         = be_set_vf_mac,
5295         .ndo_set_vf_vlan        = be_set_vf_vlan,
5296         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5297         .ndo_get_vf_config      = be_get_vf_config,
5298         .ndo_set_vf_link_state  = be_set_vf_link_state,
5299         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5300         .ndo_tx_timeout         = be_tx_timeout,
5301 #ifdef CONFIG_NET_POLL_CONTROLLER
5302         .ndo_poll_controller    = be_netpoll,
5303 #endif
5304         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5305         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5306         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5307         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5308         .ndo_features_check     = be_features_check,
5309         .ndo_get_phys_port_id   = be_get_phys_port_id,
5310 };
5311
5312 static void be_netdev_init(struct net_device *netdev)
5313 {
5314         struct be_adapter *adapter = netdev_priv(netdev);
5315
5316         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5317                 NETIF_F_GSO_UDP_TUNNEL |
5318                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5319                 NETIF_F_HW_VLAN_CTAG_TX;
5320         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5321                 netdev->hw_features |= NETIF_F_RXHASH;
5322
5323         netdev->features |= netdev->hw_features |
5324                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5325
5326         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5327                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5328
5329         netdev->priv_flags |= IFF_UNICAST_FLT;
5330
5331         netdev->flags |= IFF_MULTICAST;
5332
5333         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5334
5335         netdev->netdev_ops = &be_netdev_ops;
5336
5337         netdev->ethtool_ops = &be_ethtool_ops;
5338
5339         /* MTU range: 256 - 9000 */
5340         netdev->min_mtu = BE_MIN_MTU;
5341         netdev->max_mtu = BE_MAX_MTU;
5342 }
5343
5344 static void be_cleanup(struct be_adapter *adapter)
5345 {
5346         struct net_device *netdev = adapter->netdev;
5347
5348         rtnl_lock();
5349         netif_device_detach(netdev);
5350         if (netif_running(netdev))
5351                 be_close(netdev);
5352         rtnl_unlock();
5353
5354         be_clear(adapter);
5355 }
5356
5357 static int be_resume(struct be_adapter *adapter)
5358 {
5359         struct net_device *netdev = adapter->netdev;
5360         int status;
5361
5362         status = be_setup(adapter);
5363         if (status)
5364                 return status;
5365
5366         rtnl_lock();
5367         if (netif_running(netdev))
5368                 status = be_open(netdev);
5369         rtnl_unlock();
5370
5371         if (status)
5372                 return status;
5373
5374         netif_device_attach(netdev);
5375
5376         return 0;
5377 }
5378
5379 static void be_soft_reset(struct be_adapter *adapter)
5380 {
5381         u32 val;
5382
5383         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5384         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5385         val |= SLIPORT_SOFTRESET_SR_MASK;
5386         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5387 }
5388
5389 static bool be_err_is_recoverable(struct be_adapter *adapter)
5390 {
5391         struct be_error_recovery *err_rec = &adapter->error_recovery;
5392         unsigned long initial_idle_time =
5393                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5394         unsigned long recovery_interval =
5395                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5396         u16 ue_err_code;
5397         u32 val;
5398
5399         val = be_POST_stage_get(adapter);
5400         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5401                 return false;
5402         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5403         if (ue_err_code == 0)
5404                 return false;
5405
5406         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5407                 ue_err_code);
5408
5409         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5410                 dev_err(&adapter->pdev->dev,
5411                         "Cannot recover within %lu sec from driver load\n",
5412                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5413                 return false;
5414         }
5415
5416         if (err_rec->last_recovery_time && time_before_eq(
5417                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5418                 dev_err(&adapter->pdev->dev,
5419                         "Cannot recover within %lu sec from last recovery\n",
5420                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5421                 return false;
5422         }
5423
5424         if (ue_err_code == err_rec->last_err_code) {
5425                 dev_err(&adapter->pdev->dev,
5426                         "Cannot recover from a consecutive TPE error\n");
5427                 return false;
5428         }
5429
5430         err_rec->last_recovery_time = jiffies;
5431         err_rec->last_err_code = ue_err_code;
5432         return true;
5433 }
5434
5435 static int be_tpe_recover(struct be_adapter *adapter)
5436 {
5437         struct be_error_recovery *err_rec = &adapter->error_recovery;
5438         int status = -EAGAIN;
5439         u32 val;
5440
5441         switch (err_rec->recovery_state) {
5442         case ERR_RECOVERY_ST_NONE:
5443                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5444                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5445                 break;
5446
5447         case ERR_RECOVERY_ST_DETECT:
5448                 val = be_POST_stage_get(adapter);
5449                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5450                     POST_STAGE_RECOVERABLE_ERR) {
5451                         dev_err(&adapter->pdev->dev,
5452                                 "Unrecoverable HW error detected: 0x%x\n", val);
5453                         status = -EINVAL;
5454                         err_rec->resched_delay = 0;
5455                         break;
5456                 }
5457
5458                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5459
5460                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5461                  * milliseconds before it checks for final error status in
5462                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5463                  * If it does, then PF0 initiates a Soft Reset.
5464                  */
5465                 if (adapter->pf_num == 0) {
5466                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5467                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5468                                         ERR_RECOVERY_UE_DETECT_DURATION;
5469                         break;
5470                 }
5471
5472                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5473                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5474                                         ERR_RECOVERY_UE_DETECT_DURATION;
5475                 break;
5476
5477         case ERR_RECOVERY_ST_RESET:
5478                 if (!be_err_is_recoverable(adapter)) {
5479                         dev_err(&adapter->pdev->dev,
5480                                 "Failed to meet recovery criteria\n");
5481                         status = -EIO;
5482                         err_rec->resched_delay = 0;
5483                         break;
5484                 }
5485                 be_soft_reset(adapter);
5486                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5487                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5488                                         err_rec->ue_to_reset_time;
5489                 break;
5490
5491         case ERR_RECOVERY_ST_PRE_POLL:
5492                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5493                 err_rec->resched_delay = 0;
5494                 status = 0;                     /* done */
5495                 break;
5496
5497         default:
5498                 status = -EINVAL;
5499                 err_rec->resched_delay = 0;
5500                 break;
5501         }
5502
5503         return status;
5504 }
5505
5506 static int be_err_recover(struct be_adapter *adapter)
5507 {
5508         int status;
5509
5510         if (!lancer_chip(adapter)) {
5511                 if (!adapter->error_recovery.recovery_supported ||
5512                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5513                         return -EIO;
5514                 status = be_tpe_recover(adapter);
5515                 if (status)
5516                         goto err;
5517         }
5518
5519         /* Wait for adapter to reach quiescent state before
5520          * destroying queues
5521          */
5522         status = be_fw_wait_ready(adapter);
5523         if (status)
5524                 goto err;
5525
5526         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5527
5528         be_cleanup(adapter);
5529
5530         status = be_resume(adapter);
5531         if (status)
5532                 goto err;
5533
5534         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5535
5536 err:
5537         return status;
5538 }
5539
5540 static void be_err_detection_task(struct work_struct *work)
5541 {
5542         struct be_error_recovery *err_rec =
5543                         container_of(work, struct be_error_recovery,
5544                                      err_detection_work.work);
5545         struct be_adapter *adapter =
5546                         container_of(err_rec, struct be_adapter,
5547                                      error_recovery);
5548         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5549         struct device *dev = &adapter->pdev->dev;
5550         int recovery_status;
5551
5552         be_detect_error(adapter);
5553         if (!be_check_error(adapter, BE_ERROR_HW))
5554                 goto reschedule_task;
5555
5556         recovery_status = be_err_recover(adapter);
5557         if (!recovery_status) {
5558                 err_rec->recovery_retries = 0;
5559                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5560                 dev_info(dev, "Adapter recovery successful\n");
5561                 goto reschedule_task;
5562         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5563                 /* BEx/SH recovery state machine */
5564                 if (adapter->pf_num == 0 &&
5565                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5566                         dev_err(&adapter->pdev->dev,
5567                                 "Adapter recovery in progress\n");
5568                 resched_delay = err_rec->resched_delay;
5569                 goto reschedule_task;
5570         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5571                 /* For VFs, check if PF have allocated resources
5572                  * every second.
5573                  */
5574                 dev_err(dev, "Re-trying adapter recovery\n");
5575                 goto reschedule_task;
5576         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5577                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5578                 /* In case of another error during recovery, it takes 30 sec
5579                  * for adapter to come out of error. Retry error recovery after
5580                  * this time interval.
5581                  */
5582                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5583                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5584                 goto reschedule_task;
5585         } else {
5586                 dev_err(dev, "Adapter recovery failed\n");
5587                 dev_err(dev, "Please reboot server to recover\n");
5588         }
5589
5590         return;
5591
5592 reschedule_task:
5593         be_schedule_err_detection(adapter, resched_delay);
5594 }
5595
5596 static void be_log_sfp_info(struct be_adapter *adapter)
5597 {
5598         int status;
5599
5600         status = be_cmd_query_sfp_info(adapter);
5601         if (!status) {
5602                 dev_err(&adapter->pdev->dev,
5603                         "Port %c: %s Vendor: %s part no: %s",
5604                         adapter->port_name,
5605                         be_misconfig_evt_port_state[adapter->phy_state],
5606                         adapter->phy.vendor_name,
5607                         adapter->phy.vendor_pn);
5608         }
5609         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5610 }
5611
5612 static void be_worker(struct work_struct *work)
5613 {
5614         struct be_adapter *adapter =
5615                 container_of(work, struct be_adapter, work.work);
5616         struct be_rx_obj *rxo;
5617         int i;
5618
5619         if (be_physfn(adapter) &&
5620             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5621                 be_cmd_get_die_temperature(adapter);
5622
5623         /* when interrupts are not yet enabled, just reap any pending
5624          * mcc completions
5625          */
5626         if (!netif_running(adapter->netdev)) {
5627                 local_bh_disable();
5628                 be_process_mcc(adapter);
5629                 local_bh_enable();
5630                 goto reschedule;
5631         }
5632
5633         if (!adapter->stats_cmd_sent) {
5634                 if (lancer_chip(adapter))
5635                         lancer_cmd_get_pport_stats(adapter,
5636                                                    &adapter->stats_cmd);
5637                 else
5638                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5639         }
5640
5641         for_all_rx_queues(adapter, rxo, i) {
5642                 /* Replenish RX-queues starved due to memory
5643                  * allocation failures.
5644                  */
5645                 if (rxo->rx_post_starved)
5646                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5647         }
5648
5649         /* EQ-delay update for Skyhawk is done while notifying EQ */
5650         if (!skyhawk_chip(adapter))
5651                 be_eqd_update(adapter, false);
5652
5653         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5654                 be_log_sfp_info(adapter);
5655
5656 reschedule:
5657         adapter->work_counter++;
5658         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5659 }
5660
5661 static void be_unmap_pci_bars(struct be_adapter *adapter)
5662 {
5663         if (adapter->csr)
5664                 pci_iounmap(adapter->pdev, adapter->csr);
5665         if (adapter->db)
5666                 pci_iounmap(adapter->pdev, adapter->db);
5667         if (adapter->pcicfg && adapter->pcicfg_mapped)
5668                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5669 }
5670
5671 static int db_bar(struct be_adapter *adapter)
5672 {
5673         if (lancer_chip(adapter) || be_virtfn(adapter))
5674                 return 0;
5675         else
5676                 return 4;
5677 }
5678
5679 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5680 {
5681         if (skyhawk_chip(adapter)) {
5682                 adapter->roce_db.size = 4096;
5683                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5684                                                               db_bar(adapter));
5685                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5686                                                                db_bar(adapter));
5687         }
5688         return 0;
5689 }
5690
5691 static int be_map_pci_bars(struct be_adapter *adapter)
5692 {
5693         struct pci_dev *pdev = adapter->pdev;
5694         u8 __iomem *addr;
5695         u32 sli_intf;
5696
5697         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5698         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5699                                 SLI_INTF_FAMILY_SHIFT;
5700         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5701
5702         if (BEx_chip(adapter) && be_physfn(adapter)) {
5703                 adapter->csr = pci_iomap(pdev, 2, 0);
5704                 if (!adapter->csr)
5705                         return -ENOMEM;
5706         }
5707
5708         addr = pci_iomap(pdev, db_bar(adapter), 0);
5709         if (!addr)
5710                 goto pci_map_err;
5711         adapter->db = addr;
5712
5713         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5714                 if (be_physfn(adapter)) {
5715                         /* PCICFG is the 2nd BAR in BE2 */
5716                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5717                         if (!addr)
5718                                 goto pci_map_err;
5719                         adapter->pcicfg = addr;
5720                         adapter->pcicfg_mapped = true;
5721                 } else {
5722                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5723                         adapter->pcicfg_mapped = false;
5724                 }
5725         }
5726
5727         be_roce_map_pci_bars(adapter);
5728         return 0;
5729
5730 pci_map_err:
5731         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5732         be_unmap_pci_bars(adapter);
5733         return -ENOMEM;
5734 }
5735
5736 static void be_drv_cleanup(struct be_adapter *adapter)
5737 {
5738         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5739         struct device *dev = &adapter->pdev->dev;
5740
5741         if (mem->va)
5742                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5743
5744         mem = &adapter->rx_filter;
5745         if (mem->va)
5746                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5747
5748         mem = &adapter->stats_cmd;
5749         if (mem->va)
5750                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5751 }
5752
5753 /* Allocate and initialize various fields in be_adapter struct */
5754 static int be_drv_init(struct be_adapter *adapter)
5755 {
5756         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5757         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5758         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5759         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5760         struct device *dev = &adapter->pdev->dev;
5761         int status = 0;
5762
5763         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5764         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5765                                                  &mbox_mem_alloc->dma,
5766                                                  GFP_KERNEL);
5767         if (!mbox_mem_alloc->va)
5768                 return -ENOMEM;
5769
5770         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5771         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5772         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5773
5774         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5775         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5776                                             &rx_filter->dma, GFP_KERNEL);
5777         if (!rx_filter->va) {
5778                 status = -ENOMEM;
5779                 goto free_mbox;
5780         }
5781
5782         if (lancer_chip(adapter))
5783                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5784         else if (BE2_chip(adapter))
5785                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5786         else if (BE3_chip(adapter))
5787                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5788         else
5789                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5790         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5791                                             &stats_cmd->dma, GFP_KERNEL);
5792         if (!stats_cmd->va) {
5793                 status = -ENOMEM;
5794                 goto free_rx_filter;
5795         }
5796
5797         mutex_init(&adapter->mbox_lock);
5798         mutex_init(&adapter->mcc_lock);
5799         mutex_init(&adapter->rx_filter_lock);
5800         spin_lock_init(&adapter->mcc_cq_lock);
5801         init_completion(&adapter->et_cmd_compl);
5802
5803         pci_save_state(adapter->pdev);
5804
5805         INIT_DELAYED_WORK(&adapter->work, be_worker);
5806
5807         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5808         adapter->error_recovery.resched_delay = 0;
5809         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5810                           be_err_detection_task);
5811
5812         adapter->rx_fc = true;
5813         adapter->tx_fc = true;
5814
5815         /* Must be a power of 2 or else MODULO will BUG_ON */
5816         adapter->be_get_temp_freq = 64;
5817
5818         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5819         return 0;
5820
5821 free_rx_filter:
5822         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5823 free_mbox:
5824         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5825                           mbox_mem_alloc->dma);
5826         return status;
5827 }
5828
5829 static void be_remove(struct pci_dev *pdev)
5830 {
5831         struct be_adapter *adapter = pci_get_drvdata(pdev);
5832
5833         if (!adapter)
5834                 return;
5835
5836         be_roce_dev_remove(adapter);
5837         be_intr_set(adapter, false);
5838
5839         be_cancel_err_detection(adapter);
5840
5841         unregister_netdev(adapter->netdev);
5842
5843         be_clear(adapter);
5844
5845         if (!pci_vfs_assigned(adapter->pdev))
5846                 be_cmd_reset_function(adapter);
5847
5848         /* tell fw we're done with firing cmds */
5849         be_cmd_fw_clean(adapter);
5850
5851         be_unmap_pci_bars(adapter);
5852         be_drv_cleanup(adapter);
5853
5854         pci_disable_pcie_error_reporting(pdev);
5855
5856         pci_release_regions(pdev);
5857         pci_disable_device(pdev);
5858
5859         free_netdev(adapter->netdev);
5860 }
5861
5862 static ssize_t be_hwmon_show_temp(struct device *dev,
5863                                   struct device_attribute *dev_attr,
5864                                   char *buf)
5865 {
5866         struct be_adapter *adapter = dev_get_drvdata(dev);
5867
5868         /* Unit: millidegree Celsius */
5869         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5870                 return -EIO;
5871         else
5872                 return sprintf(buf, "%u\n",
5873                                adapter->hwmon_info.be_on_die_temp * 1000);
5874 }
5875
5876 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5877                           be_hwmon_show_temp, NULL, 1);
5878
5879 static struct attribute *be_hwmon_attrs[] = {
5880         &sensor_dev_attr_temp1_input.dev_attr.attr,
5881         NULL
5882 };
5883
5884 ATTRIBUTE_GROUPS(be_hwmon);
5885
5886 static char *mc_name(struct be_adapter *adapter)
5887 {
5888         char *str = ""; /* default */
5889
5890         switch (adapter->mc_type) {
5891         case UMC:
5892                 str = "UMC";
5893                 break;
5894         case FLEX10:
5895                 str = "FLEX10";
5896                 break;
5897         case vNIC1:
5898                 str = "vNIC-1";
5899                 break;
5900         case nPAR:
5901                 str = "nPAR";
5902                 break;
5903         case UFP:
5904                 str = "UFP";
5905                 break;
5906         case vNIC2:
5907                 str = "vNIC-2";
5908                 break;
5909         default:
5910                 str = "";
5911         }
5912
5913         return str;
5914 }
5915
5916 static inline char *func_name(struct be_adapter *adapter)
5917 {
5918         return be_physfn(adapter) ? "PF" : "VF";
5919 }
5920
5921 static inline char *nic_name(struct pci_dev *pdev)
5922 {
5923         switch (pdev->device) {
5924         case OC_DEVICE_ID1:
5925                 return OC_NAME;
5926         case OC_DEVICE_ID2:
5927                 return OC_NAME_BE;
5928         case OC_DEVICE_ID3:
5929         case OC_DEVICE_ID4:
5930                 return OC_NAME_LANCER;
5931         case BE_DEVICE_ID2:
5932                 return BE3_NAME;
5933         case OC_DEVICE_ID5:
5934         case OC_DEVICE_ID6:
5935                 return OC_NAME_SH;
5936         default:
5937                 return BE_NAME;
5938         }
5939 }
5940
5941 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5942 {
5943         struct be_adapter *adapter;
5944         struct net_device *netdev;
5945         int status = 0;
5946
5947         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5948
5949         status = pci_enable_device(pdev);
5950         if (status)
5951                 goto do_none;
5952
5953         status = pci_request_regions(pdev, DRV_NAME);
5954         if (status)
5955                 goto disable_dev;
5956         pci_set_master(pdev);
5957
5958         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5959         if (!netdev) {
5960                 status = -ENOMEM;
5961                 goto rel_reg;
5962         }
5963         adapter = netdev_priv(netdev);
5964         adapter->pdev = pdev;
5965         pci_set_drvdata(pdev, adapter);
5966         adapter->netdev = netdev;
5967         SET_NETDEV_DEV(netdev, &pdev->dev);
5968
5969         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5970         if (!status) {
5971                 netdev->features |= NETIF_F_HIGHDMA;
5972         } else {
5973                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5974                 if (status) {
5975                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5976                         goto free_netdev;
5977                 }
5978         }
5979
5980         status = pci_enable_pcie_error_reporting(pdev);
5981         if (!status)
5982                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5983
5984         status = be_map_pci_bars(adapter);
5985         if (status)
5986                 goto free_netdev;
5987
5988         status = be_drv_init(adapter);
5989         if (status)
5990                 goto unmap_bars;
5991
5992         status = be_setup(adapter);
5993         if (status)
5994                 goto drv_cleanup;
5995
5996         be_netdev_init(netdev);
5997         status = register_netdev(netdev);
5998         if (status != 0)
5999                 goto unsetup;
6000
6001         be_roce_dev_add(adapter);
6002
6003         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6004         adapter->error_recovery.probe_time = jiffies;
6005
6006         /* On Die temperature not supported for VF. */
6007         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6008                 adapter->hwmon_info.hwmon_dev =
6009                         devm_hwmon_device_register_with_groups(&pdev->dev,
6010                                                                DRV_NAME,
6011                                                                adapter,
6012                                                                be_hwmon_groups);
6013                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6014         }
6015
6016         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6017                  func_name(adapter), mc_name(adapter), adapter->port_name);
6018
6019         return 0;
6020
6021 unsetup:
6022         be_clear(adapter);
6023 drv_cleanup:
6024         be_drv_cleanup(adapter);
6025 unmap_bars:
6026         be_unmap_pci_bars(adapter);
6027 free_netdev:
6028         free_netdev(netdev);
6029 rel_reg:
6030         pci_release_regions(pdev);
6031 disable_dev:
6032         pci_disable_device(pdev);
6033 do_none:
6034         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6035         return status;
6036 }
6037
6038 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6039 {
6040         struct be_adapter *adapter = pci_get_drvdata(pdev);
6041
6042         be_intr_set(adapter, false);
6043         be_cancel_err_detection(adapter);
6044
6045         be_cleanup(adapter);
6046
6047         pci_save_state(pdev);
6048         pci_disable_device(pdev);
6049         pci_set_power_state(pdev, pci_choose_state(pdev, state));
6050         return 0;
6051 }
6052
6053 static int be_pci_resume(struct pci_dev *pdev)
6054 {
6055         struct be_adapter *adapter = pci_get_drvdata(pdev);
6056         int status = 0;
6057
6058         status = pci_enable_device(pdev);
6059         if (status)
6060                 return status;
6061
6062         pci_restore_state(pdev);
6063
6064         status = be_resume(adapter);
6065         if (status)
6066                 return status;
6067
6068         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6069
6070         return 0;
6071 }
6072
6073 /*
6074  * An FLR will stop BE from DMAing any data.
6075  */
6076 static void be_shutdown(struct pci_dev *pdev)
6077 {
6078         struct be_adapter *adapter = pci_get_drvdata(pdev);
6079
6080         if (!adapter)
6081                 return;
6082
6083         be_roce_dev_shutdown(adapter);
6084         cancel_delayed_work_sync(&adapter->work);
6085         be_cancel_err_detection(adapter);
6086
6087         netif_device_detach(adapter->netdev);
6088
6089         be_cmd_reset_function(adapter);
6090
6091         pci_disable_device(pdev);
6092 }
6093
6094 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6095                                             pci_channel_state_t state)
6096 {
6097         struct be_adapter *adapter = pci_get_drvdata(pdev);
6098
6099         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6100
6101         be_roce_dev_remove(adapter);
6102
6103         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6104                 be_set_error(adapter, BE_ERROR_EEH);
6105
6106                 be_cancel_err_detection(adapter);
6107
6108                 be_cleanup(adapter);
6109         }
6110
6111         if (state == pci_channel_io_perm_failure)
6112                 return PCI_ERS_RESULT_DISCONNECT;
6113
6114         pci_disable_device(pdev);
6115
6116         /* The error could cause the FW to trigger a flash debug dump.
6117          * Resetting the card while flash dump is in progress
6118          * can cause it not to recover; wait for it to finish.
6119          * Wait only for first function as it is needed only once per
6120          * adapter.
6121          */
6122         if (pdev->devfn == 0)
6123                 ssleep(30);
6124
6125         return PCI_ERS_RESULT_NEED_RESET;
6126 }
6127
6128 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6129 {
6130         struct be_adapter *adapter = pci_get_drvdata(pdev);
6131         int status;
6132
6133         dev_info(&adapter->pdev->dev, "EEH reset\n");
6134
6135         status = pci_enable_device(pdev);
6136         if (status)
6137                 return PCI_ERS_RESULT_DISCONNECT;
6138
6139         pci_set_master(pdev);
6140         pci_restore_state(pdev);
6141
6142         /* Check if card is ok and fw is ready */
6143         dev_info(&adapter->pdev->dev,
6144                  "Waiting for FW to be ready after EEH reset\n");
6145         status = be_fw_wait_ready(adapter);
6146         if (status)
6147                 return PCI_ERS_RESULT_DISCONNECT;
6148
6149         be_clear_error(adapter, BE_CLEAR_ALL);
6150         return PCI_ERS_RESULT_RECOVERED;
6151 }
6152
6153 static void be_eeh_resume(struct pci_dev *pdev)
6154 {
6155         int status = 0;
6156         struct be_adapter *adapter = pci_get_drvdata(pdev);
6157
6158         dev_info(&adapter->pdev->dev, "EEH resume\n");
6159
6160         pci_save_state(pdev);
6161
6162         status = be_resume(adapter);
6163         if (status)
6164                 goto err;
6165
6166         be_roce_dev_add(adapter);
6167
6168         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6169         return;
6170 err:
6171         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6172 }
6173
6174 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6175 {
6176         struct be_adapter *adapter = pci_get_drvdata(pdev);
6177         struct be_resources vft_res = {0};
6178         int status;
6179
6180         if (!num_vfs)
6181                 be_vf_clear(adapter);
6182
6183         adapter->num_vfs = num_vfs;
6184
6185         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6186                 dev_warn(&pdev->dev,
6187                          "Cannot disable VFs while they are assigned\n");
6188                 return -EBUSY;
6189         }
6190
6191         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6192          * are equally distributed across the max-number of VFs. The user may
6193          * request only a subset of the max-vfs to be enabled.
6194          * Based on num_vfs, redistribute the resources across num_vfs so that
6195          * each VF will have access to more number of resources.
6196          * This facility is not available in BE3 FW.
6197          * Also, this is done by FW in Lancer chip.
6198          */
6199         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6200                 be_calculate_vf_res(adapter, adapter->num_vfs,
6201                                     &vft_res);
6202                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6203                                                  adapter->num_vfs, &vft_res);
6204                 if (status)
6205                         dev_err(&pdev->dev,
6206                                 "Failed to optimize SR-IOV resources\n");
6207         }
6208
6209         status = be_get_resources(adapter);
6210         if (status)
6211                 return be_cmd_status(status);
6212
6213         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6214         rtnl_lock();
6215         status = be_update_queues(adapter);
6216         rtnl_unlock();
6217         if (status)
6218                 return be_cmd_status(status);
6219
6220         if (adapter->num_vfs)
6221                 status = be_vf_setup(adapter);
6222
6223         if (!status)
6224                 return adapter->num_vfs;
6225
6226         return 0;
6227 }
6228
6229 static const struct pci_error_handlers be_eeh_handlers = {
6230         .error_detected = be_eeh_err_detected,
6231         .slot_reset = be_eeh_reset,
6232         .resume = be_eeh_resume,
6233 };
6234
6235 static struct pci_driver be_driver = {
6236         .name = DRV_NAME,
6237         .id_table = be_dev_ids,
6238         .probe = be_probe,
6239         .remove = be_remove,
6240         .suspend = be_suspend,
6241         .resume = be_pci_resume,
6242         .shutdown = be_shutdown,
6243         .sriov_configure = be_pci_sriov_configure,
6244         .err_handler = &be_eeh_handlers
6245 };
6246
6247 static int __init be_init_module(void)
6248 {
6249         int status;
6250
6251         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6252             rx_frag_size != 2048) {
6253                 printk(KERN_WARNING DRV_NAME
6254                         " : Module param rx_frag_size must be 2048/4096/8192."
6255                         " Using 2048\n");
6256                 rx_frag_size = 2048;
6257         }
6258
6259         if (num_vfs > 0) {
6260                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6261                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6262         }
6263
6264         be_wq = create_singlethread_workqueue("be_wq");
6265         if (!be_wq) {
6266                 pr_warn(DRV_NAME "workqueue creation failed\n");
6267                 return -1;
6268         }
6269
6270         be_err_recovery_workq =
6271                 create_singlethread_workqueue("be_err_recover");
6272         if (!be_err_recovery_workq)
6273                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6274
6275         status = pci_register_driver(&be_driver);
6276         if (status) {
6277                 destroy_workqueue(be_wq);
6278                 be_destroy_err_recovery_workq();
6279         }
6280         return status;
6281 }
6282 module_init(be_init_module);
6283
6284 static void __exit be_exit_module(void)
6285 {
6286         pci_unregister_driver(&be_driver);
6287
6288         be_destroy_err_recovery_workq();
6289
6290         if (be_wq)
6291                 destroy_workqueue(be_wq);
6292 }
6293 module_exit(be_exit_module);