1 /*******************************************************************************
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
26 *******************************************************************************/
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
58 #include <linux/dca.h>
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
103 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
104 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
105 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
106 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
107 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
108 /* required last entry */
112 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
114 void igb_reset(struct igb_adapter *);
115 static int igb_setup_all_tx_resources(struct igb_adapter *);
116 static int igb_setup_all_rx_resources(struct igb_adapter *);
117 static void igb_free_all_tx_resources(struct igb_adapter *);
118 static void igb_free_all_rx_resources(struct igb_adapter *);
119 static void igb_setup_mrqc(struct igb_adapter *);
120 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
121 static void __devexit igb_remove(struct pci_dev *pdev);
122 static int igb_sw_init(struct igb_adapter *);
123 static int igb_open(struct net_device *);
124 static int igb_close(struct net_device *);
125 static void igb_configure_tx(struct igb_adapter *);
126 static void igb_configure_rx(struct igb_adapter *);
127 static void igb_clean_all_tx_rings(struct igb_adapter *);
128 static void igb_clean_all_rx_rings(struct igb_adapter *);
129 static void igb_clean_tx_ring(struct igb_ring *);
130 static void igb_clean_rx_ring(struct igb_ring *);
131 static void igb_set_rx_mode(struct net_device *);
132 static void igb_update_phy_info(unsigned long);
133 static void igb_watchdog(unsigned long);
134 static void igb_watchdog_task(struct work_struct *);
135 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
136 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
137 struct rtnl_link_stats64 *stats);
138 static int igb_change_mtu(struct net_device *, int);
139 static int igb_set_mac(struct net_device *, void *);
140 static void igb_set_uta(struct igb_adapter *adapter);
141 static irqreturn_t igb_intr(int irq, void *);
142 static irqreturn_t igb_intr_msi(int irq, void *);
143 static irqreturn_t igb_msix_other(int irq, void *);
144 static irqreturn_t igb_msix_ring(int irq, void *);
145 #ifdef CONFIG_IGB_DCA
146 static void igb_update_dca(struct igb_q_vector *);
147 static void igb_setup_dca(struct igb_adapter *);
148 #endif /* CONFIG_IGB_DCA */
149 static int igb_poll(struct napi_struct *, int);
150 static bool igb_clean_tx_irq(struct igb_q_vector *);
151 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
152 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
153 static void igb_tx_timeout(struct net_device *);
154 static void igb_reset_task(struct work_struct *);
155 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
156 static int igb_vlan_rx_add_vid(struct net_device *, u16);
157 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
158 static void igb_restore_vlan(struct igb_adapter *);
159 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
160 static void igb_ping_all_vfs(struct igb_adapter *);
161 static void igb_msg_task(struct igb_adapter *);
162 static void igb_vmm_control(struct igb_adapter *);
163 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
164 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
165 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
166 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
167 int vf, u16 vlan, u8 qos);
168 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
169 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
170 struct ifla_vf_info *ivi);
171 static void igb_check_vf_rate_limit(struct igb_adapter *);
173 #ifdef CONFIG_PCI_IOV
174 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
175 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
176 static int igb_check_vf_assignment(struct igb_adapter *adapter);
180 #ifdef CONFIG_PM_SLEEP
181 static int igb_suspend(struct device *);
183 static int igb_resume(struct device *);
184 #ifdef CONFIG_PM_RUNTIME
185 static int igb_runtime_suspend(struct device *dev);
186 static int igb_runtime_resume(struct device *dev);
187 static int igb_runtime_idle(struct device *dev);
189 static const struct dev_pm_ops igb_pm_ops = {
190 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
191 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
195 static void igb_shutdown(struct pci_dev *);
196 #ifdef CONFIG_IGB_DCA
197 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
198 static struct notifier_block dca_notifier = {
199 .notifier_call = igb_notify_dca,
204 #ifdef CONFIG_NET_POLL_CONTROLLER
205 /* for netdump / net console */
206 static void igb_netpoll(struct net_device *);
208 #ifdef CONFIG_PCI_IOV
209 static unsigned int max_vfs = 0;
210 module_param(max_vfs, uint, 0);
211 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
212 "per physical function");
213 #endif /* CONFIG_PCI_IOV */
215 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
216 pci_channel_state_t);
217 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
218 static void igb_io_resume(struct pci_dev *);
220 static struct pci_error_handlers igb_err_handler = {
221 .error_detected = igb_io_error_detected,
222 .slot_reset = igb_io_slot_reset,
223 .resume = igb_io_resume,
226 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
228 static struct pci_driver igb_driver = {
229 .name = igb_driver_name,
230 .id_table = igb_pci_tbl,
232 .remove = __devexit_p(igb_remove),
234 .driver.pm = &igb_pm_ops,
236 .shutdown = igb_shutdown,
237 .err_handler = &igb_err_handler
240 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
241 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
242 MODULE_LICENSE("GPL");
243 MODULE_VERSION(DRV_VERSION);
245 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
246 static int debug = -1;
247 module_param(debug, int, 0);
248 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
250 struct igb_reg_info {
255 static const struct igb_reg_info igb_reg_info_tbl[] = {
257 /* General Registers */
258 {E1000_CTRL, "CTRL"},
259 {E1000_STATUS, "STATUS"},
260 {E1000_CTRL_EXT, "CTRL_EXT"},
262 /* Interrupt Registers */
266 {E1000_RCTL, "RCTL"},
267 {E1000_RDLEN(0), "RDLEN"},
268 {E1000_RDH(0), "RDH"},
269 {E1000_RDT(0), "RDT"},
270 {E1000_RXDCTL(0), "RXDCTL"},
271 {E1000_RDBAL(0), "RDBAL"},
272 {E1000_RDBAH(0), "RDBAH"},
275 {E1000_TCTL, "TCTL"},
276 {E1000_TDBAL(0), "TDBAL"},
277 {E1000_TDBAH(0), "TDBAH"},
278 {E1000_TDLEN(0), "TDLEN"},
279 {E1000_TDH(0), "TDH"},
280 {E1000_TDT(0), "TDT"},
281 {E1000_TXDCTL(0), "TXDCTL"},
282 {E1000_TDFH, "TDFH"},
283 {E1000_TDFT, "TDFT"},
284 {E1000_TDFHS, "TDFHS"},
285 {E1000_TDFPC, "TDFPC"},
287 /* List Terminator */
292 * igb_regdump - register printout routine
294 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
300 switch (reginfo->ofs) {
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_RDLEN(n));
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_RDH(n));
310 for (n = 0; n < 4; n++)
311 regs[n] = rd32(E1000_RDT(n));
313 case E1000_RXDCTL(0):
314 for (n = 0; n < 4; n++)
315 regs[n] = rd32(E1000_RXDCTL(n));
318 for (n = 0; n < 4; n++)
319 regs[n] = rd32(E1000_RDBAL(n));
322 for (n = 0; n < 4; n++)
323 regs[n] = rd32(E1000_RDBAH(n));
326 for (n = 0; n < 4; n++)
327 regs[n] = rd32(E1000_RDBAL(n));
330 for (n = 0; n < 4; n++)
331 regs[n] = rd32(E1000_TDBAH(n));
334 for (n = 0; n < 4; n++)
335 regs[n] = rd32(E1000_TDLEN(n));
338 for (n = 0; n < 4; n++)
339 regs[n] = rd32(E1000_TDH(n));
342 for (n = 0; n < 4; n++)
343 regs[n] = rd32(E1000_TDT(n));
345 case E1000_TXDCTL(0):
346 for (n = 0; n < 4; n++)
347 regs[n] = rd32(E1000_TXDCTL(n));
350 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
354 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
355 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
360 * igb_dump - Print registers, tx-rings and rx-rings
362 static void igb_dump(struct igb_adapter *adapter)
364 struct net_device *netdev = adapter->netdev;
365 struct e1000_hw *hw = &adapter->hw;
366 struct igb_reg_info *reginfo;
367 struct igb_ring *tx_ring;
368 union e1000_adv_tx_desc *tx_desc;
369 struct my_u0 { u64 a; u64 b; } *u0;
370 struct igb_ring *rx_ring;
371 union e1000_adv_rx_desc *rx_desc;
375 if (!netif_msg_hw(adapter))
378 /* Print netdevice Info */
380 dev_info(&adapter->pdev->dev, "Net device Info\n");
381 pr_info("Device Name state trans_start "
383 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
384 netdev->state, netdev->trans_start, netdev->last_rx);
387 /* Print Registers */
388 dev_info(&adapter->pdev->dev, "Register Dump\n");
389 pr_info(" Register Name Value\n");
390 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
391 reginfo->name; reginfo++) {
392 igb_regdump(hw, reginfo);
395 /* Print TX Ring Summary */
396 if (!netdev || !netif_running(netdev))
399 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
400 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
401 for (n = 0; n < adapter->num_tx_queues; n++) {
402 struct igb_tx_buffer *buffer_info;
403 tx_ring = adapter->tx_ring[n];
404 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
405 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
406 n, tx_ring->next_to_use, tx_ring->next_to_clean,
407 (u64)buffer_info->dma,
409 buffer_info->next_to_watch,
410 (u64)buffer_info->time_stamp);
414 if (!netif_msg_tx_done(adapter))
415 goto rx_ring_summary;
417 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
419 /* Transmit Descriptor Formats
421 * Advanced Transmit Descriptor
422 * +--------------------------------------------------------------+
423 * 0 | Buffer Address [63:0] |
424 * +--------------------------------------------------------------+
425 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
426 * +--------------------------------------------------------------+
427 * 63 46 45 40 39 38 36 35 32 31 24 15 0
430 for (n = 0; n < adapter->num_tx_queues; n++) {
431 tx_ring = adapter->tx_ring[n];
432 pr_info("------------------------------------\n");
433 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
434 pr_info("------------------------------------\n");
435 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
436 "[bi->dma ] leng ntw timestamp "
439 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
440 const char *next_desc;
441 struct igb_tx_buffer *buffer_info;
442 tx_desc = IGB_TX_DESC(tx_ring, i);
443 buffer_info = &tx_ring->tx_buffer_info[i];
444 u0 = (struct my_u0 *)tx_desc;
445 if (i == tx_ring->next_to_use &&
446 i == tx_ring->next_to_clean)
447 next_desc = " NTC/U";
448 else if (i == tx_ring->next_to_use)
450 else if (i == tx_ring->next_to_clean)
455 pr_info("T [0x%03X] %016llX %016llX %016llX"
456 " %04X %p %016llX %p%s\n", i,
459 (u64)buffer_info->dma,
461 buffer_info->next_to_watch,
462 (u64)buffer_info->time_stamp,
463 buffer_info->skb, next_desc);
465 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
466 print_hex_dump(KERN_INFO, "",
468 16, 1, phys_to_virt(buffer_info->dma),
469 buffer_info->length, true);
473 /* Print RX Rings Summary */
475 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
476 pr_info("Queue [NTU] [NTC]\n");
477 for (n = 0; n < adapter->num_rx_queues; n++) {
478 rx_ring = adapter->rx_ring[n];
479 pr_info(" %5d %5X %5X\n",
480 n, rx_ring->next_to_use, rx_ring->next_to_clean);
484 if (!netif_msg_rx_status(adapter))
487 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
489 /* Advanced Receive Descriptor (Read) Format
491 * +-----------------------------------------------------+
492 * 0 | Packet Buffer Address [63:1] |A0/NSE|
493 * +----------------------------------------------+------+
494 * 8 | Header Buffer Address [63:1] | DD |
495 * +-----------------------------------------------------+
498 * Advanced Receive Descriptor (Write-Back) Format
500 * 63 48 47 32 31 30 21 20 17 16 4 3 0
501 * +------------------------------------------------------+
502 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
503 * | Checksum Ident | | | | Type | Type |
504 * +------------------------------------------------------+
505 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
506 * +------------------------------------------------------+
507 * 63 48 47 32 31 20 19 0
510 for (n = 0; n < adapter->num_rx_queues; n++) {
511 rx_ring = adapter->rx_ring[n];
512 pr_info("------------------------------------\n");
513 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
514 pr_info("------------------------------------\n");
515 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
516 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
517 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
518 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
520 for (i = 0; i < rx_ring->count; i++) {
521 const char *next_desc;
522 struct igb_rx_buffer *buffer_info;
523 buffer_info = &rx_ring->rx_buffer_info[i];
524 rx_desc = IGB_RX_DESC(rx_ring, i);
525 u0 = (struct my_u0 *)rx_desc;
526 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
528 if (i == rx_ring->next_to_use)
530 else if (i == rx_ring->next_to_clean)
535 if (staterr & E1000_RXD_STAT_DD) {
536 /* Descriptor Done */
537 pr_info("%s[0x%03X] %016llX %016llX -------"
538 "--------- %p%s\n", "RWB", i,
541 buffer_info->skb, next_desc);
543 pr_info("%s[0x%03X] %016llX %016llX %016llX"
547 (u64)buffer_info->dma,
548 buffer_info->skb, next_desc);
550 if (netif_msg_pktdata(adapter)) {
551 print_hex_dump(KERN_INFO, "",
554 phys_to_virt(buffer_info->dma),
555 IGB_RX_HDR_LEN, true);
556 print_hex_dump(KERN_INFO, "",
560 buffer_info->page_dma +
561 buffer_info->page_offset),
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 struct igb_adapter *adapter = hw->back;
579 return adapter->netdev;
583 * igb_init_module - Driver Registration Routine
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
588 static int __init igb_init_module(void)
591 pr_info("%s - version %s\n",
592 igb_driver_string, igb_driver_version);
594 pr_info("%s\n", igb_copyright);
596 #ifdef CONFIG_IGB_DCA
597 dca_register_notify(&dca_notifier);
599 ret = pci_register_driver(&igb_driver);
603 module_init(igb_init_module);
606 * igb_exit_module - Driver Exit Cleanup Routine
608 * igb_exit_module is called just before the driver is removed
611 static void __exit igb_exit_module(void)
613 #ifdef CONFIG_IGB_DCA
614 dca_unregister_notify(&dca_notifier);
616 pci_unregister_driver(&igb_driver);
619 module_exit(igb_exit_module);
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
632 u32 rbase_offset = adapter->vfs_allocated_count;
634 switch (adapter->hw.mac.type) {
636 /* The queues are allocated for virtualization such that VF 0
637 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638 * In order to avoid collision we start at the first free queue
639 * and continue consuming queues in the same sequence
641 if (adapter->vfs_allocated_count) {
642 for (; i < adapter->rss_queues; i++)
643 adapter->rx_ring[i]->reg_idx = rbase_offset +
652 for (; i < adapter->num_rx_queues; i++)
653 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
654 for (; j < adapter->num_tx_queues; j++)
655 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
660 static void igb_free_queues(struct igb_adapter *adapter)
664 for (i = 0; i < adapter->num_tx_queues; i++) {
665 kfree(adapter->tx_ring[i]);
666 adapter->tx_ring[i] = NULL;
668 for (i = 0; i < adapter->num_rx_queues; i++) {
669 kfree(adapter->rx_ring[i]);
670 adapter->rx_ring[i] = NULL;
672 adapter->num_rx_queues = 0;
673 adapter->num_tx_queues = 0;
677 * igb_alloc_queues - Allocate memory for all rings
678 * @adapter: board private structure to initialize
680 * We allocate one ring per queue at run-time since we don't know the
681 * number of queues at compile-time.
683 static int igb_alloc_queues(struct igb_adapter *adapter)
685 struct igb_ring *ring;
687 int orig_node = adapter->node;
689 for (i = 0; i < adapter->num_tx_queues; i++) {
690 if (orig_node == -1) {
691 int cur_node = next_online_node(adapter->node);
692 if (cur_node == MAX_NUMNODES)
693 cur_node = first_online_node;
694 adapter->node = cur_node;
696 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
699 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702 ring->count = adapter->tx_ring_count;
703 ring->queue_index = i;
704 ring->dev = &adapter->pdev->dev;
705 ring->netdev = adapter->netdev;
706 ring->numa_node = adapter->node;
707 /* For 82575, context index must be unique per ring. */
708 if (adapter->hw.mac.type == e1000_82575)
709 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
710 adapter->tx_ring[i] = ring;
712 /* Restore the adapter's original node */
713 adapter->node = orig_node;
715 for (i = 0; i < adapter->num_rx_queues; i++) {
716 if (orig_node == -1) {
717 int cur_node = next_online_node(adapter->node);
718 if (cur_node == MAX_NUMNODES)
719 cur_node = first_online_node;
720 adapter->node = cur_node;
722 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
725 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
728 ring->count = adapter->rx_ring_count;
729 ring->queue_index = i;
730 ring->dev = &adapter->pdev->dev;
731 ring->netdev = adapter->netdev;
732 ring->numa_node = adapter->node;
733 /* set flag indicating ring supports SCTP checksum offload */
734 if (adapter->hw.mac.type >= e1000_82576)
735 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
738 * On i350, i210, and i211, loopback VLAN packets
739 * have the tag byte-swapped.
741 if (adapter->hw.mac.type >= e1000_i350)
742 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
744 adapter->rx_ring[i] = ring;
746 /* Restore the adapter's original node */
747 adapter->node = orig_node;
749 igb_cache_ring_register(adapter);
754 /* Restore the adapter's original node */
755 adapter->node = orig_node;
756 igb_free_queues(adapter);
762 * igb_write_ivar - configure ivar for given MSI-X vector
763 * @hw: pointer to the HW structure
764 * @msix_vector: vector number we are allocating to a given ring
765 * @index: row index of IVAR register to write within IVAR table
766 * @offset: column offset of in IVAR, should be multiple of 8
768 * This function is intended to handle the writing of the IVAR register
769 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
770 * each containing an cause allocation for an Rx and Tx ring, and a
771 * variable number of rows depending on the number of queues supported.
773 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
774 int index, int offset)
776 u32 ivar = array_rd32(E1000_IVAR0, index);
778 /* clear any bits that are currently set */
779 ivar &= ~((u32)0xFF << offset);
781 /* write vector and valid bit */
782 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
784 array_wr32(E1000_IVAR0, index, ivar);
787 #define IGB_N0_QUEUE -1
788 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
790 struct igb_adapter *adapter = q_vector->adapter;
791 struct e1000_hw *hw = &adapter->hw;
792 int rx_queue = IGB_N0_QUEUE;
793 int tx_queue = IGB_N0_QUEUE;
796 if (q_vector->rx.ring)
797 rx_queue = q_vector->rx.ring->reg_idx;
798 if (q_vector->tx.ring)
799 tx_queue = q_vector->tx.ring->reg_idx;
801 switch (hw->mac.type) {
803 /* The 82575 assigns vectors using a bitmask, which matches the
804 bitmask for the EICR/EIMS/EIMC registers. To assign one
805 or more queues to a vector, we write the appropriate bits
806 into the MSIXBM register for that vector. */
807 if (rx_queue > IGB_N0_QUEUE)
808 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
809 if (tx_queue > IGB_N0_QUEUE)
810 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
811 if (!adapter->msix_entries && msix_vector == 0)
812 msixbm |= E1000_EIMS_OTHER;
813 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
814 q_vector->eims_value = msixbm;
818 * 82576 uses a table that essentially consists of 2 columns
819 * with 8 rows. The ordering is column-major so we use the
820 * lower 3 bits as the row index, and the 4th bit as the
823 if (rx_queue > IGB_N0_QUEUE)
824 igb_write_ivar(hw, msix_vector,
826 (rx_queue & 0x8) << 1);
827 if (tx_queue > IGB_N0_QUEUE)
828 igb_write_ivar(hw, msix_vector,
830 ((tx_queue & 0x8) << 1) + 8);
831 q_vector->eims_value = 1 << msix_vector;
838 * On 82580 and newer adapters the scheme is similar to 82576
839 * however instead of ordering column-major we have things
840 * ordered row-major. So we traverse the table by using
841 * bit 0 as the column offset, and the remaining bits as the
844 if (rx_queue > IGB_N0_QUEUE)
845 igb_write_ivar(hw, msix_vector,
847 (rx_queue & 0x1) << 4);
848 if (tx_queue > IGB_N0_QUEUE)
849 igb_write_ivar(hw, msix_vector,
851 ((tx_queue & 0x1) << 4) + 8);
852 q_vector->eims_value = 1 << msix_vector;
859 /* add q_vector eims value to global eims_enable_mask */
860 adapter->eims_enable_mask |= q_vector->eims_value;
862 /* configure q_vector to set itr on first interrupt */
863 q_vector->set_itr = 1;
867 * igb_configure_msix - Configure MSI-X hardware
869 * igb_configure_msix sets up the hardware to properly
870 * generate MSI-X interrupts.
872 static void igb_configure_msix(struct igb_adapter *adapter)
876 struct e1000_hw *hw = &adapter->hw;
878 adapter->eims_enable_mask = 0;
880 /* set vector for other causes, i.e. link changes */
881 switch (hw->mac.type) {
883 tmp = rd32(E1000_CTRL_EXT);
884 /* enable MSI-X PBA support*/
885 tmp |= E1000_CTRL_EXT_PBA_CLR;
887 /* Auto-Mask interrupts upon ICR read. */
888 tmp |= E1000_CTRL_EXT_EIAME;
889 tmp |= E1000_CTRL_EXT_IRCA;
891 wr32(E1000_CTRL_EXT, tmp);
893 /* enable msix_other interrupt */
894 array_wr32(E1000_MSIXBM(0), vector++,
896 adapter->eims_other = E1000_EIMS_OTHER;
905 /* Turn on MSI-X capability first, or our settings
906 * won't stick. And it will take days to debug. */
907 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
908 E1000_GPIE_PBA | E1000_GPIE_EIAME |
911 /* enable msix_other interrupt */
912 adapter->eims_other = 1 << vector;
913 tmp = (vector++ | E1000_IVAR_VALID) << 8;
915 wr32(E1000_IVAR_MISC, tmp);
918 /* do nothing, since nothing else supports MSI-X */
920 } /* switch (hw->mac.type) */
922 adapter->eims_enable_mask |= adapter->eims_other;
924 for (i = 0; i < adapter->num_q_vectors; i++)
925 igb_assign_vector(adapter->q_vector[i], vector++);
931 * igb_request_msix - Initialize MSI-X interrupts
933 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
936 static int igb_request_msix(struct igb_adapter *adapter)
938 struct net_device *netdev = adapter->netdev;
939 struct e1000_hw *hw = &adapter->hw;
940 int i, err = 0, vector = 0;
942 err = request_irq(adapter->msix_entries[vector].vector,
943 igb_msix_other, 0, netdev->name, adapter);
948 for (i = 0; i < adapter->num_q_vectors; i++) {
949 struct igb_q_vector *q_vector = adapter->q_vector[i];
951 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
953 if (q_vector->rx.ring && q_vector->tx.ring)
954 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
955 q_vector->rx.ring->queue_index);
956 else if (q_vector->tx.ring)
957 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
958 q_vector->tx.ring->queue_index);
959 else if (q_vector->rx.ring)
960 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
961 q_vector->rx.ring->queue_index);
963 sprintf(q_vector->name, "%s-unused", netdev->name);
965 err = request_irq(adapter->msix_entries[vector].vector,
966 igb_msix_ring, 0, q_vector->name,
973 igb_configure_msix(adapter);
979 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
981 if (adapter->msix_entries) {
982 pci_disable_msix(adapter->pdev);
983 kfree(adapter->msix_entries);
984 adapter->msix_entries = NULL;
985 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
986 pci_disable_msi(adapter->pdev);
991 * igb_free_q_vectors - Free memory allocated for interrupt vectors
992 * @adapter: board private structure to initialize
994 * This function frees the memory allocated to the q_vectors. In addition if
995 * NAPI is enabled it will delete any references to the NAPI struct prior
996 * to freeing the q_vector.
998 static void igb_free_q_vectors(struct igb_adapter *adapter)
1002 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1003 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1004 adapter->q_vector[v_idx] = NULL;
1007 netif_napi_del(&q_vector->napi);
1010 adapter->num_q_vectors = 0;
1014 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1016 * This function resets the device so that it has 0 rx queues, tx queues, and
1017 * MSI-X interrupts allocated.
1019 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1021 igb_free_queues(adapter);
1022 igb_free_q_vectors(adapter);
1023 igb_reset_interrupt_capability(adapter);
1027 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1029 * Attempt to configure interrupts using the best available
1030 * capabilities of the hardware and kernel.
1032 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1037 /* Number of supported queues. */
1038 adapter->num_rx_queues = adapter->rss_queues;
1039 if (adapter->vfs_allocated_count)
1040 adapter->num_tx_queues = 1;
1042 adapter->num_tx_queues = adapter->rss_queues;
1044 /* start with one vector for every rx queue */
1045 numvecs = adapter->num_rx_queues;
1047 /* if tx handler is separate add 1 for every tx queue */
1048 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1049 numvecs += adapter->num_tx_queues;
1051 /* store the number of vectors reserved for queues */
1052 adapter->num_q_vectors = numvecs;
1054 /* add 1 vector for link status interrupts */
1056 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1059 if (!adapter->msix_entries)
1062 for (i = 0; i < numvecs; i++)
1063 adapter->msix_entries[i].entry = i;
1065 err = pci_enable_msix(adapter->pdev,
1066 adapter->msix_entries,
1071 igb_reset_interrupt_capability(adapter);
1073 /* If we can't do MSI-X, try MSI */
1075 #ifdef CONFIG_PCI_IOV
1076 /* disable SR-IOV for non MSI-X configurations */
1077 if (adapter->vf_data) {
1078 struct e1000_hw *hw = &adapter->hw;
1079 /* disable iov and allow time for transactions to clear */
1080 pci_disable_sriov(adapter->pdev);
1083 kfree(adapter->vf_data);
1084 adapter->vf_data = NULL;
1085 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1088 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1091 adapter->vfs_allocated_count = 0;
1092 adapter->rss_queues = 1;
1093 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1094 adapter->num_rx_queues = 1;
1095 adapter->num_tx_queues = 1;
1096 adapter->num_q_vectors = 1;
1097 if (!pci_enable_msi(adapter->pdev))
1098 adapter->flags |= IGB_FLAG_HAS_MSI;
1100 /* Notify the stack of the (possibly) reduced queue counts. */
1102 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1103 err = netif_set_real_num_rx_queues(adapter->netdev,
1104 adapter->num_rx_queues);
1110 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1111 * @adapter: board private structure to initialize
1113 * We allocate one q_vector per queue interrupt. If allocation fails we
1116 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1118 struct igb_q_vector *q_vector;
1119 struct e1000_hw *hw = &adapter->hw;
1121 int orig_node = adapter->node;
1123 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1124 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1125 adapter->num_tx_queues)) &&
1126 (adapter->num_rx_queues == v_idx))
1127 adapter->node = orig_node;
1128 if (orig_node == -1) {
1129 int cur_node = next_online_node(adapter->node);
1130 if (cur_node == MAX_NUMNODES)
1131 cur_node = first_online_node;
1132 adapter->node = cur_node;
1134 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1137 q_vector = kzalloc(sizeof(struct igb_q_vector),
1141 q_vector->adapter = adapter;
1142 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1143 q_vector->itr_val = IGB_START_ITR;
1144 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1145 adapter->q_vector[v_idx] = q_vector;
1147 /* Restore the adapter's original node */
1148 adapter->node = orig_node;
1153 /* Restore the adapter's original node */
1154 adapter->node = orig_node;
1155 igb_free_q_vectors(adapter);
1159 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1160 int ring_idx, int v_idx)
1162 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1164 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1165 q_vector->rx.ring->q_vector = q_vector;
1166 q_vector->rx.count++;
1167 q_vector->itr_val = adapter->rx_itr_setting;
1168 if (q_vector->itr_val && q_vector->itr_val <= 3)
1169 q_vector->itr_val = IGB_START_ITR;
1172 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1173 int ring_idx, int v_idx)
1175 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1177 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1178 q_vector->tx.ring->q_vector = q_vector;
1179 q_vector->tx.count++;
1180 q_vector->itr_val = adapter->tx_itr_setting;
1181 q_vector->tx.work_limit = adapter->tx_work_limit;
1182 if (q_vector->itr_val && q_vector->itr_val <= 3)
1183 q_vector->itr_val = IGB_START_ITR;
1187 * igb_map_ring_to_vector - maps allocated queues to vectors
1189 * This function maps the recently allocated queues to vectors.
1191 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1196 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1197 (adapter->num_q_vectors < adapter->num_tx_queues))
1200 if (adapter->num_q_vectors >=
1201 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1202 for (i = 0; i < adapter->num_rx_queues; i++)
1203 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1204 for (i = 0; i < adapter->num_tx_queues; i++)
1205 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1207 for (i = 0; i < adapter->num_rx_queues; i++) {
1208 if (i < adapter->num_tx_queues)
1209 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1210 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1212 for (; i < adapter->num_tx_queues; i++)
1213 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1219 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1221 * This function initializes the interrupts and allocates all of the queues.
1223 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1225 struct pci_dev *pdev = adapter->pdev;
1228 err = igb_set_interrupt_capability(adapter);
1232 err = igb_alloc_q_vectors(adapter);
1234 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1235 goto err_alloc_q_vectors;
1238 err = igb_alloc_queues(adapter);
1240 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1241 goto err_alloc_queues;
1244 err = igb_map_ring_to_vector(adapter);
1246 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1247 goto err_map_queues;
1253 igb_free_queues(adapter);
1255 igb_free_q_vectors(adapter);
1256 err_alloc_q_vectors:
1257 igb_reset_interrupt_capability(adapter);
1262 * igb_request_irq - initialize interrupts
1264 * Attempts to configure interrupts using the best available
1265 * capabilities of the hardware and kernel.
1267 static int igb_request_irq(struct igb_adapter *adapter)
1269 struct net_device *netdev = adapter->netdev;
1270 struct pci_dev *pdev = adapter->pdev;
1273 if (adapter->msix_entries) {
1274 err = igb_request_msix(adapter);
1277 /* fall back to MSI */
1278 igb_clear_interrupt_scheme(adapter);
1279 if (!pci_enable_msi(pdev))
1280 adapter->flags |= IGB_FLAG_HAS_MSI;
1281 igb_free_all_tx_resources(adapter);
1282 igb_free_all_rx_resources(adapter);
1283 adapter->num_tx_queues = 1;
1284 adapter->num_rx_queues = 1;
1285 adapter->num_q_vectors = 1;
1286 err = igb_alloc_q_vectors(adapter);
1289 "Unable to allocate memory for vectors\n");
1292 err = igb_alloc_queues(adapter);
1295 "Unable to allocate memory for queues\n");
1296 igb_free_q_vectors(adapter);
1299 igb_setup_all_tx_resources(adapter);
1300 igb_setup_all_rx_resources(adapter);
1303 igb_assign_vector(adapter->q_vector[0], 0);
1305 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1306 err = request_irq(pdev->irq, igb_intr_msi, 0,
1307 netdev->name, adapter);
1311 /* fall back to legacy interrupts */
1312 igb_reset_interrupt_capability(adapter);
1313 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1316 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1317 netdev->name, adapter);
1320 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1327 static void igb_free_irq(struct igb_adapter *adapter)
1329 if (adapter->msix_entries) {
1332 free_irq(adapter->msix_entries[vector++].vector, adapter);
1334 for (i = 0; i < adapter->num_q_vectors; i++)
1335 free_irq(adapter->msix_entries[vector++].vector,
1336 adapter->q_vector[i]);
1338 free_irq(adapter->pdev->irq, adapter);
1343 * igb_irq_disable - Mask off interrupt generation on the NIC
1344 * @adapter: board private structure
1346 static void igb_irq_disable(struct igb_adapter *adapter)
1348 struct e1000_hw *hw = &adapter->hw;
1351 * we need to be careful when disabling interrupts. The VFs are also
1352 * mapped into these registers and so clearing the bits can cause
1353 * issues on the VF drivers so we only need to clear what we set
1355 if (adapter->msix_entries) {
1356 u32 regval = rd32(E1000_EIAM);
1357 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1358 wr32(E1000_EIMC, adapter->eims_enable_mask);
1359 regval = rd32(E1000_EIAC);
1360 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1364 wr32(E1000_IMC, ~0);
1366 if (adapter->msix_entries) {
1368 for (i = 0; i < adapter->num_q_vectors; i++)
1369 synchronize_irq(adapter->msix_entries[i].vector);
1371 synchronize_irq(adapter->pdev->irq);
1376 * igb_irq_enable - Enable default interrupt generation settings
1377 * @adapter: board private structure
1379 static void igb_irq_enable(struct igb_adapter *adapter)
1381 struct e1000_hw *hw = &adapter->hw;
1383 if (adapter->msix_entries) {
1384 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1385 u32 regval = rd32(E1000_EIAC);
1386 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1387 regval = rd32(E1000_EIAM);
1388 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1389 wr32(E1000_EIMS, adapter->eims_enable_mask);
1390 if (adapter->vfs_allocated_count) {
1391 wr32(E1000_MBVFIMR, 0xFF);
1392 ims |= E1000_IMS_VMMB;
1394 wr32(E1000_IMS, ims);
1396 wr32(E1000_IMS, IMS_ENABLE_MASK |
1398 wr32(E1000_IAM, IMS_ENABLE_MASK |
1403 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1405 struct e1000_hw *hw = &adapter->hw;
1406 u16 vid = adapter->hw.mng_cookie.vlan_id;
1407 u16 old_vid = adapter->mng_vlan_id;
1409 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1410 /* add VID to filter table */
1411 igb_vfta_set(hw, vid, true);
1412 adapter->mng_vlan_id = vid;
1414 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1417 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1419 !test_bit(old_vid, adapter->active_vlans)) {
1420 /* remove VID from filter table */
1421 igb_vfta_set(hw, old_vid, false);
1426 * igb_release_hw_control - release control of the h/w to f/w
1427 * @adapter: address of board private structure
1429 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1430 * For ASF and Pass Through versions of f/w this means that the
1431 * driver is no longer loaded.
1434 static void igb_release_hw_control(struct igb_adapter *adapter)
1436 struct e1000_hw *hw = &adapter->hw;
1439 /* Let firmware take over control of h/w */
1440 ctrl_ext = rd32(E1000_CTRL_EXT);
1441 wr32(E1000_CTRL_EXT,
1442 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1446 * igb_get_hw_control - get control of the h/w from f/w
1447 * @adapter: address of board private structure
1449 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1450 * For ASF and Pass Through versions of f/w this means that
1451 * the driver is loaded.
1454 static void igb_get_hw_control(struct igb_adapter *adapter)
1456 struct e1000_hw *hw = &adapter->hw;
1459 /* Let firmware know the driver has taken over */
1460 ctrl_ext = rd32(E1000_CTRL_EXT);
1461 wr32(E1000_CTRL_EXT,
1462 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1466 * igb_configure - configure the hardware for RX and TX
1467 * @adapter: private board structure
1469 static void igb_configure(struct igb_adapter *adapter)
1471 struct net_device *netdev = adapter->netdev;
1474 igb_get_hw_control(adapter);
1475 igb_set_rx_mode(netdev);
1477 igb_restore_vlan(adapter);
1479 igb_setup_tctl(adapter);
1480 igb_setup_mrqc(adapter);
1481 igb_setup_rctl(adapter);
1483 igb_configure_tx(adapter);
1484 igb_configure_rx(adapter);
1486 igb_rx_fifo_flush_82575(&adapter->hw);
1488 /* call igb_desc_unused which always leaves
1489 * at least 1 descriptor unused to make sure
1490 * next_to_use != next_to_clean */
1491 for (i = 0; i < adapter->num_rx_queues; i++) {
1492 struct igb_ring *ring = adapter->rx_ring[i];
1493 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1498 * igb_power_up_link - Power up the phy/serdes link
1499 * @adapter: address of board private structure
1501 void igb_power_up_link(struct igb_adapter *adapter)
1503 igb_reset_phy(&adapter->hw);
1505 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1506 igb_power_up_phy_copper(&adapter->hw);
1508 igb_power_up_serdes_link_82575(&adapter->hw);
1512 * igb_power_down_link - Power down the phy/serdes link
1513 * @adapter: address of board private structure
1515 static void igb_power_down_link(struct igb_adapter *adapter)
1517 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1518 igb_power_down_phy_copper_82575(&adapter->hw);
1520 igb_shutdown_serdes_link_82575(&adapter->hw);
1524 * igb_up - Open the interface and prepare it to handle traffic
1525 * @adapter: board private structure
1527 int igb_up(struct igb_adapter *adapter)
1529 struct e1000_hw *hw = &adapter->hw;
1532 /* hardware has been reset, we need to reload some things */
1533 igb_configure(adapter);
1535 clear_bit(__IGB_DOWN, &adapter->state);
1537 for (i = 0; i < adapter->num_q_vectors; i++)
1538 napi_enable(&(adapter->q_vector[i]->napi));
1540 if (adapter->msix_entries)
1541 igb_configure_msix(adapter);
1543 igb_assign_vector(adapter->q_vector[0], 0);
1545 /* Clear any pending interrupts. */
1547 igb_irq_enable(adapter);
1549 /* notify VFs that reset has been completed */
1550 if (adapter->vfs_allocated_count) {
1551 u32 reg_data = rd32(E1000_CTRL_EXT);
1552 reg_data |= E1000_CTRL_EXT_PFRSTD;
1553 wr32(E1000_CTRL_EXT, reg_data);
1556 netif_tx_start_all_queues(adapter->netdev);
1558 /* start the watchdog. */
1559 hw->mac.get_link_status = 1;
1560 schedule_work(&adapter->watchdog_task);
1565 void igb_down(struct igb_adapter *adapter)
1567 struct net_device *netdev = adapter->netdev;
1568 struct e1000_hw *hw = &adapter->hw;
1572 /* signal that we're down so the interrupt handler does not
1573 * reschedule our watchdog timer */
1574 set_bit(__IGB_DOWN, &adapter->state);
1576 /* disable receives in the hardware */
1577 rctl = rd32(E1000_RCTL);
1578 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1579 /* flush and sleep below */
1581 netif_tx_stop_all_queues(netdev);
1583 /* disable transmits in the hardware */
1584 tctl = rd32(E1000_TCTL);
1585 tctl &= ~E1000_TCTL_EN;
1586 wr32(E1000_TCTL, tctl);
1587 /* flush both disables and wait for them to finish */
1591 for (i = 0; i < adapter->num_q_vectors; i++)
1592 napi_disable(&(adapter->q_vector[i]->napi));
1594 igb_irq_disable(adapter);
1596 del_timer_sync(&adapter->watchdog_timer);
1597 del_timer_sync(&adapter->phy_info_timer);
1599 netif_carrier_off(netdev);
1601 /* record the stats before reset*/
1602 spin_lock(&adapter->stats64_lock);
1603 igb_update_stats(adapter, &adapter->stats64);
1604 spin_unlock(&adapter->stats64_lock);
1606 adapter->link_speed = 0;
1607 adapter->link_duplex = 0;
1609 if (!pci_channel_offline(adapter->pdev))
1611 igb_clean_all_tx_rings(adapter);
1612 igb_clean_all_rx_rings(adapter);
1613 #ifdef CONFIG_IGB_DCA
1615 /* since we reset the hardware DCA settings were cleared */
1616 igb_setup_dca(adapter);
1620 void igb_reinit_locked(struct igb_adapter *adapter)
1622 WARN_ON(in_interrupt());
1623 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1627 clear_bit(__IGB_RESETTING, &adapter->state);
1630 void igb_reset(struct igb_adapter *adapter)
1632 struct pci_dev *pdev = adapter->pdev;
1633 struct e1000_hw *hw = &adapter->hw;
1634 struct e1000_mac_info *mac = &hw->mac;
1635 struct e1000_fc_info *fc = &hw->fc;
1636 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1639 /* Repartition Pba for greater than 9k mtu
1640 * To take effect CTRL.RST is required.
1642 switch (mac->type) {
1645 pba = rd32(E1000_RXPBS);
1646 pba = igb_rxpbs_adjust_82580(pba);
1649 pba = rd32(E1000_RXPBS);
1650 pba &= E1000_RXPBS_SIZE_MASK_82576;
1656 pba = E1000_PBA_34K;
1660 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1661 (mac->type < e1000_82576)) {
1662 /* adjust PBA for jumbo frames */
1663 wr32(E1000_PBA, pba);
1665 /* To maintain wire speed transmits, the Tx FIFO should be
1666 * large enough to accommodate two full transmit packets,
1667 * rounded up to the next 1KB and expressed in KB. Likewise,
1668 * the Rx FIFO should be large enough to accommodate at least
1669 * one full receive packet and is similarly rounded up and
1670 * expressed in KB. */
1671 pba = rd32(E1000_PBA);
1672 /* upper 16 bits has Tx packet buffer allocation size in KB */
1673 tx_space = pba >> 16;
1674 /* lower 16 bits has Rx packet buffer allocation size in KB */
1676 /* the tx fifo also stores 16 bytes of information about the tx
1677 * but don't include ethernet FCS because hardware appends it */
1678 min_tx_space = (adapter->max_frame_size +
1679 sizeof(union e1000_adv_tx_desc) -
1681 min_tx_space = ALIGN(min_tx_space, 1024);
1682 min_tx_space >>= 10;
1683 /* software strips receive CRC, so leave room for it */
1684 min_rx_space = adapter->max_frame_size;
1685 min_rx_space = ALIGN(min_rx_space, 1024);
1686 min_rx_space >>= 10;
1688 /* If current Tx allocation is less than the min Tx FIFO size,
1689 * and the min Tx FIFO size is less than the current Rx FIFO
1690 * allocation, take space away from current Rx allocation */
1691 if (tx_space < min_tx_space &&
1692 ((min_tx_space - tx_space) < pba)) {
1693 pba = pba - (min_tx_space - tx_space);
1695 /* if short on rx space, rx wins and must trump tx
1697 if (pba < min_rx_space)
1700 wr32(E1000_PBA, pba);
1703 /* flow control settings */
1704 /* The high water mark must be low enough to fit one full frame
1705 * (or the size used for early receive) above it in the Rx FIFO.
1706 * Set it to the lower of:
1707 * - 90% of the Rx FIFO size, or
1708 * - the full Rx FIFO size minus one full frame */
1709 hwm = min(((pba << 10) * 9 / 10),
1710 ((pba << 10) - 2 * adapter->max_frame_size));
1712 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1713 fc->low_water = fc->high_water - 16;
1714 fc->pause_time = 0xFFFF;
1716 fc->current_mode = fc->requested_mode;
1718 /* disable receive for all VFs and wait one second */
1719 if (adapter->vfs_allocated_count) {
1721 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1722 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1724 /* ping all the active vfs to let them know we are going down */
1725 igb_ping_all_vfs(adapter);
1727 /* disable transmits and receives */
1728 wr32(E1000_VFRE, 0);
1729 wr32(E1000_VFTE, 0);
1732 /* Allow time for pending master requests to run */
1733 hw->mac.ops.reset_hw(hw);
1736 if (hw->mac.ops.init_hw(hw))
1737 dev_err(&pdev->dev, "Hardware Error\n");
1740 * Flow control settings reset on hardware reset, so guarantee flow
1741 * control is off when forcing speed.
1743 if (!hw->mac.autoneg)
1744 igb_force_mac_fc(hw);
1746 igb_init_dmac(adapter, pba);
1747 if (!netif_running(adapter->netdev))
1748 igb_power_down_link(adapter);
1750 igb_update_mng_vlan(adapter);
1752 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1753 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1755 igb_get_phy_info(hw);
1758 static netdev_features_t igb_fix_features(struct net_device *netdev,
1759 netdev_features_t features)
1762 * Since there is no support for separate rx/tx vlan accel
1763 * enable/disable make sure tx flag is always in same state as rx.
1765 if (features & NETIF_F_HW_VLAN_RX)
1766 features |= NETIF_F_HW_VLAN_TX;
1768 features &= ~NETIF_F_HW_VLAN_TX;
1773 static int igb_set_features(struct net_device *netdev,
1774 netdev_features_t features)
1776 netdev_features_t changed = netdev->features ^ features;
1777 struct igb_adapter *adapter = netdev_priv(netdev);
1779 if (changed & NETIF_F_HW_VLAN_RX)
1780 igb_vlan_mode(netdev, features);
1782 if (!(changed & NETIF_F_RXALL))
1785 netdev->features = features;
1787 if (netif_running(netdev))
1788 igb_reinit_locked(adapter);
1795 static const struct net_device_ops igb_netdev_ops = {
1796 .ndo_open = igb_open,
1797 .ndo_stop = igb_close,
1798 .ndo_start_xmit = igb_xmit_frame,
1799 .ndo_get_stats64 = igb_get_stats64,
1800 .ndo_set_rx_mode = igb_set_rx_mode,
1801 .ndo_set_mac_address = igb_set_mac,
1802 .ndo_change_mtu = igb_change_mtu,
1803 .ndo_do_ioctl = igb_ioctl,
1804 .ndo_tx_timeout = igb_tx_timeout,
1805 .ndo_validate_addr = eth_validate_addr,
1806 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1807 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1808 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1809 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1810 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1811 .ndo_get_vf_config = igb_ndo_get_vf_config,
1812 #ifdef CONFIG_NET_POLL_CONTROLLER
1813 .ndo_poll_controller = igb_netpoll,
1815 .ndo_fix_features = igb_fix_features,
1816 .ndo_set_features = igb_set_features,
1820 * igb_set_fw_version - Configure version string for ethtool
1821 * @adapter: adapter struct
1824 void igb_set_fw_version(struct igb_adapter *adapter)
1826 struct e1000_hw *hw = &adapter->hw;
1827 u16 eeprom_verh, eeprom_verl, comb_verh, comb_verl, comb_offset;
1828 u16 major, build, patch, fw_version;
1831 hw->nvm.ops.read(hw, 5, 1, &fw_version);
1832 if (adapter->hw.mac.type != e1000_i211) {
1833 hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verh);
1834 hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verl);
1835 etrack_id = (eeprom_verh << IGB_ETRACK_SHIFT) | eeprom_verl;
1837 /* combo image version needs to be found */
1838 hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
1839 if ((comb_offset != 0x0) &&
1840 (comb_offset != IGB_NVM_VER_INVALID)) {
1841 hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
1842 + 1), 1, &comb_verh);
1843 hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
1846 /* Only display Option Rom if it exists and is valid */
1847 if ((comb_verh && comb_verl) &&
1848 ((comb_verh != IGB_NVM_VER_INVALID) &&
1849 (comb_verl != IGB_NVM_VER_INVALID))) {
1850 major = comb_verl >> IGB_COMB_VER_SHFT;
1851 build = (comb_verl << IGB_COMB_VER_SHFT) |
1852 (comb_verh >> IGB_COMB_VER_SHFT);
1853 patch = comb_verh & IGB_COMB_VER_MASK;
1854 snprintf(adapter->fw_version,
1855 sizeof(adapter->fw_version),
1856 "%d.%d%d, 0x%08x, %d.%d.%d",
1857 (fw_version & IGB_MAJOR_MASK) >>
1859 (fw_version & IGB_MINOR_MASK) >>
1861 (fw_version & IGB_BUILD_MASK),
1862 etrack_id, major, build, patch);
1866 snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1868 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1869 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1870 (fw_version & IGB_BUILD_MASK), etrack_id);
1872 snprintf(adapter->fw_version, sizeof(adapter->fw_version),
1874 (fw_version & IGB_MAJOR_MASK) >> IGB_MAJOR_SHIFT,
1875 (fw_version & IGB_MINOR_MASK) >> IGB_MINOR_SHIFT,
1876 (fw_version & IGB_BUILD_MASK));
1883 * igb_probe - Device Initialization Routine
1884 * @pdev: PCI device information struct
1885 * @ent: entry in igb_pci_tbl
1887 * Returns 0 on success, negative on failure
1889 * igb_probe initializes an adapter identified by a pci_dev structure.
1890 * The OS initialization, configuring of the adapter private structure,
1891 * and a hardware reset occur.
1893 static int __devinit igb_probe(struct pci_dev *pdev,
1894 const struct pci_device_id *ent)
1896 struct net_device *netdev;
1897 struct igb_adapter *adapter;
1898 struct e1000_hw *hw;
1899 u16 eeprom_data = 0;
1901 static int global_quad_port_a; /* global quad port a indication */
1902 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1903 unsigned long mmio_start, mmio_len;
1904 int err, pci_using_dac;
1905 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1906 u8 part_str[E1000_PBANUM_LENGTH];
1908 /* Catch broken hardware that put the wrong VF device ID in
1909 * the PCIe SR-IOV capability.
1911 if (pdev->is_virtfn) {
1912 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1913 pci_name(pdev), pdev->vendor, pdev->device);
1917 err = pci_enable_device_mem(pdev);
1922 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1924 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1928 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1930 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1932 dev_err(&pdev->dev, "No usable DMA "
1933 "configuration, aborting\n");
1939 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1945 pci_enable_pcie_error_reporting(pdev);
1947 pci_set_master(pdev);
1948 pci_save_state(pdev);
1951 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1954 goto err_alloc_etherdev;
1956 SET_NETDEV_DEV(netdev, &pdev->dev);
1958 pci_set_drvdata(pdev, netdev);
1959 adapter = netdev_priv(netdev);
1960 adapter->netdev = netdev;
1961 adapter->pdev = pdev;
1964 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1966 mmio_start = pci_resource_start(pdev, 0);
1967 mmio_len = pci_resource_len(pdev, 0);
1970 hw->hw_addr = ioremap(mmio_start, mmio_len);
1974 netdev->netdev_ops = &igb_netdev_ops;
1975 igb_set_ethtool_ops(netdev);
1976 netdev->watchdog_timeo = 5 * HZ;
1978 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1980 netdev->mem_start = mmio_start;
1981 netdev->mem_end = mmio_start + mmio_len;
1983 /* PCI config space info */
1984 hw->vendor_id = pdev->vendor;
1985 hw->device_id = pdev->device;
1986 hw->revision_id = pdev->revision;
1987 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1988 hw->subsystem_device_id = pdev->subsystem_device;
1990 /* Copy the default MAC, PHY and NVM function pointers */
1991 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1992 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1993 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1994 /* Initialize skew-specific constants */
1995 err = ei->get_invariants(hw);
1999 /* setup the private structure */
2000 err = igb_sw_init(adapter);
2004 igb_get_bus_info_pcie(hw);
2006 hw->phy.autoneg_wait_to_complete = false;
2008 /* Copper options */
2009 if (hw->phy.media_type == e1000_media_type_copper) {
2010 hw->phy.mdix = AUTO_ALL_MODES;
2011 hw->phy.disable_polarity_correction = false;
2012 hw->phy.ms_type = e1000_ms_hw_default;
2015 if (igb_check_reset_block(hw))
2016 dev_info(&pdev->dev,
2017 "PHY reset is blocked due to SOL/IDER session.\n");
2020 * features is initialized to 0 in allocation, it might have bits
2021 * set by igb_sw_init so we should use an or instead of an
2024 netdev->features |= NETIF_F_SG |
2031 NETIF_F_HW_VLAN_RX |
2034 /* copy netdev features into list of user selectable features */
2035 netdev->hw_features |= netdev->features;
2036 netdev->hw_features |= NETIF_F_RXALL;
2038 /* set this bit last since it cannot be part of hw_features */
2039 netdev->features |= NETIF_F_HW_VLAN_FILTER;
2041 netdev->vlan_features |= NETIF_F_TSO |
2047 netdev->priv_flags |= IFF_SUPP_NOFCS;
2049 if (pci_using_dac) {
2050 netdev->features |= NETIF_F_HIGHDMA;
2051 netdev->vlan_features |= NETIF_F_HIGHDMA;
2054 if (hw->mac.type >= e1000_82576) {
2055 netdev->hw_features |= NETIF_F_SCTP_CSUM;
2056 netdev->features |= NETIF_F_SCTP_CSUM;
2059 netdev->priv_flags |= IFF_UNICAST_FLT;
2061 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2063 /* before reading the NVM, reset the controller to put the device in a
2064 * known good starting state */
2065 hw->mac.ops.reset_hw(hw);
2068 * make sure the NVM is good , i211 parts have special NVM that
2069 * doesn't contain a checksum
2071 if (hw->mac.type != e1000_i211) {
2072 if (hw->nvm.ops.validate(hw) < 0) {
2073 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2079 /* copy the MAC address out of the NVM */
2080 if (hw->mac.ops.read_mac_addr(hw))
2081 dev_err(&pdev->dev, "NVM Read Error\n");
2083 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2084 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2086 if (!is_valid_ether_addr(netdev->perm_addr)) {
2087 dev_err(&pdev->dev, "Invalid MAC Address\n");
2092 /* get firmware version for ethtool -i */
2093 igb_set_fw_version(adapter);
2095 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2096 (unsigned long) adapter);
2097 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2098 (unsigned long) adapter);
2100 INIT_WORK(&adapter->reset_task, igb_reset_task);
2101 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2103 /* Initialize link properties that are user-changeable */
2104 adapter->fc_autoneg = true;
2105 hw->mac.autoneg = true;
2106 hw->phy.autoneg_advertised = 0x2f;
2108 hw->fc.requested_mode = e1000_fc_default;
2109 hw->fc.current_mode = e1000_fc_default;
2111 igb_validate_mdi_setting(hw);
2113 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2114 * enable the ACPI Magic Packet filter
2117 if (hw->bus.func == 0)
2118 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2119 else if (hw->mac.type >= e1000_82580)
2120 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2121 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2123 else if (hw->bus.func == 1)
2124 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2126 if (eeprom_data & eeprom_apme_mask)
2127 adapter->eeprom_wol |= E1000_WUFC_MAG;
2129 /* now that we have the eeprom settings, apply the special cases where
2130 * the eeprom may be wrong or the board simply won't support wake on
2131 * lan on a particular port */
2132 switch (pdev->device) {
2133 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2134 adapter->eeprom_wol = 0;
2136 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2137 case E1000_DEV_ID_82576_FIBER:
2138 case E1000_DEV_ID_82576_SERDES:
2139 /* Wake events only supported on port A for dual fiber
2140 * regardless of eeprom setting */
2141 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2142 adapter->eeprom_wol = 0;
2144 case E1000_DEV_ID_82576_QUAD_COPPER:
2145 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2146 /* if quad port adapter, disable WoL on all but port A */
2147 if (global_quad_port_a != 0)
2148 adapter->eeprom_wol = 0;
2150 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2151 /* Reset for multiple quad port adapters */
2152 if (++global_quad_port_a == 4)
2153 global_quad_port_a = 0;
2157 /* initialize the wol settings based on the eeprom settings */
2158 adapter->wol = adapter->eeprom_wol;
2159 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2161 /* reset the hardware with the new settings */
2164 /* let the f/w know that the h/w is now under the control of the
2166 igb_get_hw_control(adapter);
2168 strcpy(netdev->name, "eth%d");
2169 err = register_netdev(netdev);
2173 /* carrier off reporting is important to ethtool even BEFORE open */
2174 netif_carrier_off(netdev);
2176 #ifdef CONFIG_IGB_DCA
2177 if (dca_add_requester(&pdev->dev) == 0) {
2178 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2179 dev_info(&pdev->dev, "DCA enabled\n");
2180 igb_setup_dca(adapter);
2184 #ifdef CONFIG_IGB_PTP
2185 /* do hw tstamp init after resetting */
2186 igb_ptp_init(adapter);
2189 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2190 /* print bus type/speed/width info */
2191 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2193 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2194 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2196 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2197 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2198 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2202 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2204 strcpy(part_str, "Unknown");
2205 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2206 dev_info(&pdev->dev,
2207 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2208 adapter->msix_entries ? "MSI-X" :
2209 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2210 adapter->num_rx_queues, adapter->num_tx_queues);
2211 switch (hw->mac.type) {
2215 igb_set_eee_i350(hw);
2221 pm_runtime_put_noidle(&pdev->dev);
2225 igb_release_hw_control(adapter);
2227 if (!igb_check_reset_block(hw))
2230 if (hw->flash_address)
2231 iounmap(hw->flash_address);
2233 igb_clear_interrupt_scheme(adapter);
2234 iounmap(hw->hw_addr);
2236 free_netdev(netdev);
2238 pci_release_selected_regions(pdev,
2239 pci_select_bars(pdev, IORESOURCE_MEM));
2242 pci_disable_device(pdev);
2247 * igb_remove - Device Removal Routine
2248 * @pdev: PCI device information struct
2250 * igb_remove is called by the PCI subsystem to alert the driver
2251 * that it should release a PCI device. The could be caused by a
2252 * Hot-Plug event, or because the driver is going to be removed from
2255 static void __devexit igb_remove(struct pci_dev *pdev)
2257 struct net_device *netdev = pci_get_drvdata(pdev);
2258 struct igb_adapter *adapter = netdev_priv(netdev);
2259 struct e1000_hw *hw = &adapter->hw;
2261 pm_runtime_get_noresume(&pdev->dev);
2262 #ifdef CONFIG_IGB_PTP
2263 igb_ptp_remove(adapter);
2267 * The watchdog timer may be rescheduled, so explicitly
2268 * disable watchdog from being rescheduled.
2270 set_bit(__IGB_DOWN, &adapter->state);
2271 del_timer_sync(&adapter->watchdog_timer);
2272 del_timer_sync(&adapter->phy_info_timer);
2274 cancel_work_sync(&adapter->reset_task);
2275 cancel_work_sync(&adapter->watchdog_task);
2277 #ifdef CONFIG_IGB_DCA
2278 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2279 dev_info(&pdev->dev, "DCA disabled\n");
2280 dca_remove_requester(&pdev->dev);
2281 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2282 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2286 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2287 * would have already happened in close and is redundant. */
2288 igb_release_hw_control(adapter);
2290 unregister_netdev(netdev);
2292 igb_clear_interrupt_scheme(adapter);
2294 #ifdef CONFIG_PCI_IOV
2295 /* reclaim resources allocated to VFs */
2296 if (adapter->vf_data) {
2297 /* disable iov and allow time for transactions to clear */
2298 if (!igb_check_vf_assignment(adapter)) {
2299 pci_disable_sriov(pdev);
2302 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2305 kfree(adapter->vf_data);
2306 adapter->vf_data = NULL;
2307 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2310 dev_info(&pdev->dev, "IOV Disabled\n");
2314 iounmap(hw->hw_addr);
2315 if (hw->flash_address)
2316 iounmap(hw->flash_address);
2317 pci_release_selected_regions(pdev,
2318 pci_select_bars(pdev, IORESOURCE_MEM));
2320 kfree(adapter->shadow_vfta);
2321 free_netdev(netdev);
2323 pci_disable_pcie_error_reporting(pdev);
2325 pci_disable_device(pdev);
2329 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2330 * @adapter: board private structure to initialize
2332 * This function initializes the vf specific data storage and then attempts to
2333 * allocate the VFs. The reason for ordering it this way is because it is much
2334 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2335 * the memory for the VFs.
2337 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2339 #ifdef CONFIG_PCI_IOV
2340 struct pci_dev *pdev = adapter->pdev;
2341 struct e1000_hw *hw = &adapter->hw;
2342 int old_vfs = igb_find_enabled_vfs(adapter);
2345 /* Virtualization features not supported on i210 family. */
2346 if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
2350 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2351 "max_vfs setting of %d\n", old_vfs, max_vfs);
2352 adapter->vfs_allocated_count = old_vfs;
2355 if (!adapter->vfs_allocated_count)
2358 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2359 sizeof(struct vf_data_storage), GFP_KERNEL);
2361 /* if allocation failed then we do not support SR-IOV */
2362 if (!adapter->vf_data) {
2363 adapter->vfs_allocated_count = 0;
2364 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2370 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2373 dev_info(&pdev->dev, "%d VFs allocated\n",
2374 adapter->vfs_allocated_count);
2375 for (i = 0; i < adapter->vfs_allocated_count; i++)
2376 igb_vf_configure(adapter, i);
2378 /* DMA Coalescing is not supported in IOV mode. */
2379 adapter->flags &= ~IGB_FLAG_DMAC;
2382 kfree(adapter->vf_data);
2383 adapter->vf_data = NULL;
2384 adapter->vfs_allocated_count = 0;
2387 #endif /* CONFIG_PCI_IOV */
2391 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2392 * @adapter: board private structure to initialize
2394 * igb_sw_init initializes the Adapter private data structure.
2395 * Fields are initialized based on PCI device information and
2396 * OS network device settings (MTU size).
2398 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2400 struct e1000_hw *hw = &adapter->hw;
2401 struct net_device *netdev = adapter->netdev;
2402 struct pci_dev *pdev = adapter->pdev;
2405 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2407 /* set default ring sizes */
2408 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2409 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2411 /* set default ITR values */
2412 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2413 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2415 /* set default work limits */
2416 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2418 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2420 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2424 spin_lock_init(&adapter->stats64_lock);
2425 #ifdef CONFIG_PCI_IOV
2426 switch (hw->mac.type) {
2430 dev_warn(&pdev->dev,
2431 "Maximum of 7 VFs per PF, using max\n");
2432 adapter->vfs_allocated_count = 7;
2434 adapter->vfs_allocated_count = max_vfs;
2439 #endif /* CONFIG_PCI_IOV */
2441 /* Determine the maximum number of RSS queues supported. */
2442 switch (hw->mac.type) {
2444 max_rss_queues = IGB_MAX_RX_QUEUES_I211;
2448 max_rss_queues = IGB_MAX_RX_QUEUES_82575;
2451 /* I350 cannot do RSS and SR-IOV at the same time */
2452 if (!!adapter->vfs_allocated_count) {
2458 if (!!adapter->vfs_allocated_count) {
2465 max_rss_queues = IGB_MAX_RX_QUEUES;
2469 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
2471 /* Determine if we need to pair queues. */
2472 switch (hw->mac.type) {
2475 /* Device supports enough interrupts without queue pairing. */
2479 * If VFs are going to be allocated with RSS queues then we
2480 * should pair the queues in order to conserve interrupts due
2481 * to limited supply.
2483 if ((adapter->rss_queues > 1) &&
2484 (adapter->vfs_allocated_count > 6))
2485 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2492 * If rss_queues > half of max_rss_queues, pair the queues in
2493 * order to conserve interrupts due to limited supply.
2495 if (adapter->rss_queues > (max_rss_queues / 2))
2496 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2500 /* Setup and initialize a copy of the hw vlan table array */
2501 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2502 E1000_VLAN_FILTER_TBL_SIZE,
2505 /* This call may decrease the number of queues */
2506 if (igb_init_interrupt_scheme(adapter)) {
2507 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2511 igb_probe_vfs(adapter);
2513 /* Explicitly disable IRQ since the NIC can be in any state. */
2514 igb_irq_disable(adapter);
2516 if (hw->mac.type >= e1000_i350)
2517 adapter->flags &= ~IGB_FLAG_DMAC;
2519 set_bit(__IGB_DOWN, &adapter->state);
2524 * igb_open - Called when a network interface is made active
2525 * @netdev: network interface device structure
2527 * Returns 0 on success, negative value on failure
2529 * The open entry point is called when a network interface is made
2530 * active by the system (IFF_UP). At this point all resources needed
2531 * for transmit and receive operations are allocated, the interrupt
2532 * handler is registered with the OS, the watchdog timer is started,
2533 * and the stack is notified that the interface is ready.
2535 static int __igb_open(struct net_device *netdev, bool resuming)
2537 struct igb_adapter *adapter = netdev_priv(netdev);
2538 struct e1000_hw *hw = &adapter->hw;
2539 struct pci_dev *pdev = adapter->pdev;
2543 /* disallow open during test */
2544 if (test_bit(__IGB_TESTING, &adapter->state)) {
2550 pm_runtime_get_sync(&pdev->dev);
2552 netif_carrier_off(netdev);
2554 /* allocate transmit descriptors */
2555 err = igb_setup_all_tx_resources(adapter);
2559 /* allocate receive descriptors */
2560 err = igb_setup_all_rx_resources(adapter);
2564 igb_power_up_link(adapter);
2566 /* before we allocate an interrupt, we must be ready to handle it.
2567 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2568 * as soon as we call pci_request_irq, so we have to setup our
2569 * clean_rx handler before we do so. */
2570 igb_configure(adapter);
2572 err = igb_request_irq(adapter);
2576 /* From here on the code is the same as igb_up() */
2577 clear_bit(__IGB_DOWN, &adapter->state);
2579 for (i = 0; i < adapter->num_q_vectors; i++)
2580 napi_enable(&(adapter->q_vector[i]->napi));
2582 /* Clear any pending interrupts. */
2585 igb_irq_enable(adapter);
2587 /* notify VFs that reset has been completed */
2588 if (adapter->vfs_allocated_count) {
2589 u32 reg_data = rd32(E1000_CTRL_EXT);
2590 reg_data |= E1000_CTRL_EXT_PFRSTD;
2591 wr32(E1000_CTRL_EXT, reg_data);
2594 netif_tx_start_all_queues(netdev);
2597 pm_runtime_put(&pdev->dev);
2599 /* start the watchdog. */
2600 hw->mac.get_link_status = 1;
2601 schedule_work(&adapter->watchdog_task);
2606 igb_release_hw_control(adapter);
2607 igb_power_down_link(adapter);
2608 igb_free_all_rx_resources(adapter);
2610 igb_free_all_tx_resources(adapter);
2614 pm_runtime_put(&pdev->dev);
2619 static int igb_open(struct net_device *netdev)
2621 return __igb_open(netdev, false);
2625 * igb_close - Disables a network interface
2626 * @netdev: network interface device structure
2628 * Returns 0, this is not allowed to fail
2630 * The close entry point is called when an interface is de-activated
2631 * by the OS. The hardware is still under the driver's control, but
2632 * needs to be disabled. A global MAC reset is issued to stop the
2633 * hardware, and all transmit and receive resources are freed.
2635 static int __igb_close(struct net_device *netdev, bool suspending)
2637 struct igb_adapter *adapter = netdev_priv(netdev);
2638 struct pci_dev *pdev = adapter->pdev;
2640 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2643 pm_runtime_get_sync(&pdev->dev);
2646 igb_free_irq(adapter);
2648 igb_free_all_tx_resources(adapter);
2649 igb_free_all_rx_resources(adapter);
2652 pm_runtime_put_sync(&pdev->dev);
2656 static int igb_close(struct net_device *netdev)
2658 return __igb_close(netdev, false);
2662 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2663 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2665 * Return 0 on success, negative on failure
2667 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2669 struct device *dev = tx_ring->dev;
2670 int orig_node = dev_to_node(dev);
2673 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2674 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2675 if (!tx_ring->tx_buffer_info)
2676 tx_ring->tx_buffer_info = vzalloc(size);
2677 if (!tx_ring->tx_buffer_info)
2680 /* round up to nearest 4K */
2681 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2682 tx_ring->size = ALIGN(tx_ring->size, 4096);
2684 set_dev_node(dev, tx_ring->numa_node);
2685 tx_ring->desc = dma_alloc_coherent(dev,
2689 set_dev_node(dev, orig_node);
2691 tx_ring->desc = dma_alloc_coherent(dev,
2699 tx_ring->next_to_use = 0;
2700 tx_ring->next_to_clean = 0;
2705 vfree(tx_ring->tx_buffer_info);
2707 "Unable to allocate memory for the transmit descriptor ring\n");
2712 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2713 * (Descriptors) for all queues
2714 * @adapter: board private structure
2716 * Return 0 on success, negative on failure
2718 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2720 struct pci_dev *pdev = adapter->pdev;
2723 for (i = 0; i < adapter->num_tx_queues; i++) {
2724 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2727 "Allocation for Tx Queue %u failed\n", i);
2728 for (i--; i >= 0; i--)
2729 igb_free_tx_resources(adapter->tx_ring[i]);
2738 * igb_setup_tctl - configure the transmit control registers
2739 * @adapter: Board private structure
2741 void igb_setup_tctl(struct igb_adapter *adapter)
2743 struct e1000_hw *hw = &adapter->hw;
2746 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2747 wr32(E1000_TXDCTL(0), 0);
2749 /* Program the Transmit Control Register */
2750 tctl = rd32(E1000_TCTL);
2751 tctl &= ~E1000_TCTL_CT;
2752 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2753 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2755 igb_config_collision_dist(hw);
2757 /* Enable transmits */
2758 tctl |= E1000_TCTL_EN;
2760 wr32(E1000_TCTL, tctl);
2764 * igb_configure_tx_ring - Configure transmit ring after Reset
2765 * @adapter: board private structure
2766 * @ring: tx ring to configure
2768 * Configure a transmit ring after a reset.
2770 void igb_configure_tx_ring(struct igb_adapter *adapter,
2771 struct igb_ring *ring)
2773 struct e1000_hw *hw = &adapter->hw;
2775 u64 tdba = ring->dma;
2776 int reg_idx = ring->reg_idx;
2778 /* disable the queue */
2779 wr32(E1000_TXDCTL(reg_idx), 0);
2783 wr32(E1000_TDLEN(reg_idx),
2784 ring->count * sizeof(union e1000_adv_tx_desc));
2785 wr32(E1000_TDBAL(reg_idx),
2786 tdba & 0x00000000ffffffffULL);
2787 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2789 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2790 wr32(E1000_TDH(reg_idx), 0);
2791 writel(0, ring->tail);
2793 txdctl |= IGB_TX_PTHRESH;
2794 txdctl |= IGB_TX_HTHRESH << 8;
2795 txdctl |= IGB_TX_WTHRESH << 16;
2797 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2798 wr32(E1000_TXDCTL(reg_idx), txdctl);
2802 * igb_configure_tx - Configure transmit Unit after Reset
2803 * @adapter: board private structure
2805 * Configure the Tx unit of the MAC after a reset.
2807 static void igb_configure_tx(struct igb_adapter *adapter)
2811 for (i = 0; i < adapter->num_tx_queues; i++)
2812 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2816 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2817 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2819 * Returns 0 on success, negative on failure
2821 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2823 struct device *dev = rx_ring->dev;
2824 int orig_node = dev_to_node(dev);
2827 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2828 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2829 if (!rx_ring->rx_buffer_info)
2830 rx_ring->rx_buffer_info = vzalloc(size);
2831 if (!rx_ring->rx_buffer_info)
2834 desc_len = sizeof(union e1000_adv_rx_desc);
2836 /* Round up to nearest 4K */
2837 rx_ring->size = rx_ring->count * desc_len;
2838 rx_ring->size = ALIGN(rx_ring->size, 4096);
2840 set_dev_node(dev, rx_ring->numa_node);
2841 rx_ring->desc = dma_alloc_coherent(dev,
2845 set_dev_node(dev, orig_node);
2847 rx_ring->desc = dma_alloc_coherent(dev,
2855 rx_ring->next_to_clean = 0;
2856 rx_ring->next_to_use = 0;
2861 vfree(rx_ring->rx_buffer_info);
2862 rx_ring->rx_buffer_info = NULL;
2863 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2869 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2870 * (Descriptors) for all queues
2871 * @adapter: board private structure
2873 * Return 0 on success, negative on failure
2875 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2877 struct pci_dev *pdev = adapter->pdev;
2880 for (i = 0; i < adapter->num_rx_queues; i++) {
2881 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2884 "Allocation for Rx Queue %u failed\n", i);
2885 for (i--; i >= 0; i--)
2886 igb_free_rx_resources(adapter->rx_ring[i]);
2895 * igb_setup_mrqc - configure the multiple receive queue control registers
2896 * @adapter: Board private structure
2898 static void igb_setup_mrqc(struct igb_adapter *adapter)
2900 struct e1000_hw *hw = &adapter->hw;
2902 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2907 static const u8 rsshash[40] = {
2908 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2909 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2910 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2911 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2913 /* Fill out hash function seeds */
2914 for (j = 0; j < 10; j++) {
2915 u32 rsskey = rsshash[(j * 4)];
2916 rsskey |= rsshash[(j * 4) + 1] << 8;
2917 rsskey |= rsshash[(j * 4) + 2] << 16;
2918 rsskey |= rsshash[(j * 4) + 3] << 24;
2919 array_wr32(E1000_RSSRK(0), j, rsskey);
2922 num_rx_queues = adapter->rss_queues;
2924 if (adapter->vfs_allocated_count) {
2925 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2926 switch (hw->mac.type) {
2943 if (hw->mac.type == e1000_82575)
2947 for (j = 0; j < (32 * 4); j++) {
2948 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2950 reta.bytes[j & 3] |= num_rx_queues << shift2;
2952 wr32(E1000_RETA(j >> 2), reta.dword);
2956 * Disable raw packet checksumming so that RSS hash is placed in
2957 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2958 * offloads as they are enabled by default
2960 rxcsum = rd32(E1000_RXCSUM);
2961 rxcsum |= E1000_RXCSUM_PCSD;
2963 if (adapter->hw.mac.type >= e1000_82576)
2964 /* Enable Receive Checksum Offload for SCTP */
2965 rxcsum |= E1000_RXCSUM_CRCOFL;
2967 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2968 wr32(E1000_RXCSUM, rxcsum);
2970 * Generate RSS hash based on TCP port numbers and/or
2971 * IPv4/v6 src and dst addresses since UDP cannot be
2972 * hashed reliably due to IP fragmentation
2975 mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
2976 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2977 E1000_MRQC_RSS_FIELD_IPV6 |
2978 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2979 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2981 /* If VMDq is enabled then we set the appropriate mode for that, else
2982 * we default to RSS so that an RSS hash is calculated per packet even
2983 * if we are only using one queue */
2984 if (adapter->vfs_allocated_count) {
2985 if (hw->mac.type > e1000_82575) {
2986 /* Set the default pool for the PF's first queue */
2987 u32 vtctl = rd32(E1000_VT_CTL);
2988 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2989 E1000_VT_CTL_DISABLE_DEF_POOL);
2990 vtctl |= adapter->vfs_allocated_count <<
2991 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2992 wr32(E1000_VT_CTL, vtctl);
2994 if (adapter->rss_queues > 1)
2995 mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2997 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2999 if (hw->mac.type != e1000_i211)
3000 mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3002 igb_vmm_control(adapter);
3004 wr32(E1000_MRQC, mrqc);
3008 * igb_setup_rctl - configure the receive control registers
3009 * @adapter: Board private structure
3011 void igb_setup_rctl(struct igb_adapter *adapter)
3013 struct e1000_hw *hw = &adapter->hw;
3016 rctl = rd32(E1000_RCTL);
3018 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3019 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3021 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3022 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3025 * enable stripping of CRC. It's unlikely this will break BMC
3026 * redirection as it did with e1000. Newer features require
3027 * that the HW strips the CRC.
3029 rctl |= E1000_RCTL_SECRC;
3031 /* disable store bad packets and clear size bits. */
3032 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3034 /* enable LPE to prevent packets larger than max_frame_size */
3035 rctl |= E1000_RCTL_LPE;
3037 /* disable queue 0 to prevent tail write w/o re-config */
3038 wr32(E1000_RXDCTL(0), 0);
3040 /* Attention!!! For SR-IOV PF driver operations you must enable
3041 * queue drop for all VF and PF queues to prevent head of line blocking
3042 * if an un-trusted VF does not provide descriptors to hardware.
3044 if (adapter->vfs_allocated_count) {
3045 /* set all queue drop enable bits */
3046 wr32(E1000_QDE, ALL_QUEUES);
3049 /* This is useful for sniffing bad packets. */
3050 if (adapter->netdev->features & NETIF_F_RXALL) {
3051 /* UPE and MPE will be handled by normal PROMISC logic
3052 * in e1000e_set_rx_mode */
3053 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3054 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3055 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3057 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3058 E1000_RCTL_DPF | /* Allow filtered pause */
3059 E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3060 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3061 * and that breaks VLANs.
3065 wr32(E1000_RCTL, rctl);
3068 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3071 struct e1000_hw *hw = &adapter->hw;
3074 /* if it isn't the PF check to see if VFs are enabled and
3075 * increase the size to support vlan tags */
3076 if (vfn < adapter->vfs_allocated_count &&
3077 adapter->vf_data[vfn].vlans_enabled)
3078 size += VLAN_TAG_SIZE;
3080 vmolr = rd32(E1000_VMOLR(vfn));
3081 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3082 vmolr |= size | E1000_VMOLR_LPE;
3083 wr32(E1000_VMOLR(vfn), vmolr);
3089 * igb_rlpml_set - set maximum receive packet size
3090 * @adapter: board private structure
3092 * Configure maximum receivable packet size.
3094 static void igb_rlpml_set(struct igb_adapter *adapter)
3096 u32 max_frame_size = adapter->max_frame_size;
3097 struct e1000_hw *hw = &adapter->hw;
3098 u16 pf_id = adapter->vfs_allocated_count;
3101 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3103 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3104 * to our max jumbo frame size, in case we need to enable
3105 * jumbo frames on one of the rings later.
3106 * This will not pass over-length frames into the default
3107 * queue because it's gated by the VMOLR.RLPML.
3109 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3112 wr32(E1000_RLPML, max_frame_size);
3115 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3118 struct e1000_hw *hw = &adapter->hw;
3122 * This register exists only on 82576 and newer so if we are older then
3123 * we should exit and do nothing
3125 if (hw->mac.type < e1000_82576)
3128 vmolr = rd32(E1000_VMOLR(vfn));
3129 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3131 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3133 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3135 /* clear all bits that might not be set */
3136 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3138 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3139 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3141 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3144 if (vfn <= adapter->vfs_allocated_count)
3145 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3147 wr32(E1000_VMOLR(vfn), vmolr);
3151 * igb_configure_rx_ring - Configure a receive ring after Reset
3152 * @adapter: board private structure
3153 * @ring: receive ring to be configured
3155 * Configure the Rx unit of the MAC after a reset.
3157 void igb_configure_rx_ring(struct igb_adapter *adapter,
3158 struct igb_ring *ring)
3160 struct e1000_hw *hw = &adapter->hw;
3161 u64 rdba = ring->dma;
3162 int reg_idx = ring->reg_idx;
3163 u32 srrctl = 0, rxdctl = 0;
3165 /* disable the queue */
3166 wr32(E1000_RXDCTL(reg_idx), 0);
3168 /* Set DMA base address registers */
3169 wr32(E1000_RDBAL(reg_idx),
3170 rdba & 0x00000000ffffffffULL);
3171 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3172 wr32(E1000_RDLEN(reg_idx),
3173 ring->count * sizeof(union e1000_adv_rx_desc));
3175 /* initialize head and tail */
3176 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3177 wr32(E1000_RDH(reg_idx), 0);
3178 writel(0, ring->tail);
3180 /* set descriptor configuration */
3181 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3182 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3183 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3185 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3187 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3188 if (hw->mac.type >= e1000_82580)
3189 srrctl |= E1000_SRRCTL_TIMESTAMP;
3190 /* Only set Drop Enable if we are supporting multiple queues */
3191 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3192 srrctl |= E1000_SRRCTL_DROP_EN;
3194 wr32(E1000_SRRCTL(reg_idx), srrctl);
3196 /* set filtering for VMDQ pools */
3197 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3199 rxdctl |= IGB_RX_PTHRESH;
3200 rxdctl |= IGB_RX_HTHRESH << 8;
3201 rxdctl |= IGB_RX_WTHRESH << 16;
3203 /* enable receive descriptor fetching */
3204 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3205 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3209 * igb_configure_rx - Configure receive Unit after Reset
3210 * @adapter: board private structure
3212 * Configure the Rx unit of the MAC after a reset.
3214 static void igb_configure_rx(struct igb_adapter *adapter)
3218 /* set UTA to appropriate mode */
3219 igb_set_uta(adapter);
3221 /* set the correct pool for the PF default MAC address in entry 0 */
3222 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3223 adapter->vfs_allocated_count);
3225 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3226 * the Base and Length of the Rx Descriptor Ring */
3227 for (i = 0; i < adapter->num_rx_queues; i++)
3228 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3232 * igb_free_tx_resources - Free Tx Resources per Queue
3233 * @tx_ring: Tx descriptor ring for a specific queue
3235 * Free all transmit software resources
3237 void igb_free_tx_resources(struct igb_ring *tx_ring)
3239 igb_clean_tx_ring(tx_ring);
3241 vfree(tx_ring->tx_buffer_info);
3242 tx_ring->tx_buffer_info = NULL;
3244 /* if not set, then don't free */
3248 dma_free_coherent(tx_ring->dev, tx_ring->size,
3249 tx_ring->desc, tx_ring->dma);
3251 tx_ring->desc = NULL;
3255 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3256 * @adapter: board private structure
3258 * Free all transmit software resources
3260 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3264 for (i = 0; i < adapter->num_tx_queues; i++)
3265 igb_free_tx_resources(adapter->tx_ring[i]);
3268 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3269 struct igb_tx_buffer *tx_buffer)
3271 if (tx_buffer->skb) {
3272 dev_kfree_skb_any(tx_buffer->skb);
3274 dma_unmap_single(ring->dev,
3278 } else if (tx_buffer->dma) {
3279 dma_unmap_page(ring->dev,
3284 tx_buffer->next_to_watch = NULL;
3285 tx_buffer->skb = NULL;
3287 /* buffer_info must be completely set up in the transmit path */
3291 * igb_clean_tx_ring - Free Tx Buffers
3292 * @tx_ring: ring to be cleaned
3294 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3296 struct igb_tx_buffer *buffer_info;
3300 if (!tx_ring->tx_buffer_info)
3302 /* Free all the Tx ring sk_buffs */
3304 for (i = 0; i < tx_ring->count; i++) {
3305 buffer_info = &tx_ring->tx_buffer_info[i];
3306 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3309 netdev_tx_reset_queue(txring_txq(tx_ring));
3311 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3312 memset(tx_ring->tx_buffer_info, 0, size);
3314 /* Zero out the descriptor ring */
3315 memset(tx_ring->desc, 0, tx_ring->size);
3317 tx_ring->next_to_use = 0;
3318 tx_ring->next_to_clean = 0;
3322 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3323 * @adapter: board private structure
3325 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3329 for (i = 0; i < adapter->num_tx_queues; i++)
3330 igb_clean_tx_ring(adapter->tx_ring[i]);
3334 * igb_free_rx_resources - Free Rx Resources
3335 * @rx_ring: ring to clean the resources from
3337 * Free all receive software resources
3339 void igb_free_rx_resources(struct igb_ring *rx_ring)
3341 igb_clean_rx_ring(rx_ring);
3343 vfree(rx_ring->rx_buffer_info);
3344 rx_ring->rx_buffer_info = NULL;
3346 /* if not set, then don't free */
3350 dma_free_coherent(rx_ring->dev, rx_ring->size,
3351 rx_ring->desc, rx_ring->dma);
3353 rx_ring->desc = NULL;
3357 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3358 * @adapter: board private structure
3360 * Free all receive software resources
3362 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3366 for (i = 0; i < adapter->num_rx_queues; i++)
3367 igb_free_rx_resources(adapter->rx_ring[i]);
3371 * igb_clean_rx_ring - Free Rx Buffers per Queue
3372 * @rx_ring: ring to free buffers from
3374 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3379 if (!rx_ring->rx_buffer_info)
3382 /* Free all the Rx ring sk_buffs */
3383 for (i = 0; i < rx_ring->count; i++) {
3384 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3385 if (buffer_info->dma) {
3386 dma_unmap_single(rx_ring->dev,
3390 buffer_info->dma = 0;
3393 if (buffer_info->skb) {
3394 dev_kfree_skb(buffer_info->skb);
3395 buffer_info->skb = NULL;
3397 if (buffer_info->page_dma) {
3398 dma_unmap_page(rx_ring->dev,
3399 buffer_info->page_dma,
3402 buffer_info->page_dma = 0;
3404 if (buffer_info->page) {
3405 put_page(buffer_info->page);
3406 buffer_info->page = NULL;
3407 buffer_info->page_offset = 0;
3411 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3412 memset(rx_ring->rx_buffer_info, 0, size);
3414 /* Zero out the descriptor ring */
3415 memset(rx_ring->desc, 0, rx_ring->size);
3417 rx_ring->next_to_clean = 0;
3418 rx_ring->next_to_use = 0;
3422 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3423 * @adapter: board private structure
3425 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3429 for (i = 0; i < adapter->num_rx_queues; i++)
3430 igb_clean_rx_ring(adapter->rx_ring[i]);
3434 * igb_set_mac - Change the Ethernet Address of the NIC
3435 * @netdev: network interface device structure
3436 * @p: pointer to an address structure
3438 * Returns 0 on success, negative on failure
3440 static int igb_set_mac(struct net_device *netdev, void *p)
3442 struct igb_adapter *adapter = netdev_priv(netdev);
3443 struct e1000_hw *hw = &adapter->hw;
3444 struct sockaddr *addr = p;
3446 if (!is_valid_ether_addr(addr->sa_data))
3447 return -EADDRNOTAVAIL;
3449 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3450 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3452 /* set the correct pool for the new PF MAC address in entry 0 */
3453 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3454 adapter->vfs_allocated_count);
3460 * igb_write_mc_addr_list - write multicast addresses to MTA
3461 * @netdev: network interface device structure
3463 * Writes multicast address list to the MTA hash table.
3464 * Returns: -ENOMEM on failure
3465 * 0 on no addresses written
3466 * X on writing X addresses to MTA
3468 static int igb_write_mc_addr_list(struct net_device *netdev)
3470 struct igb_adapter *adapter = netdev_priv(netdev);
3471 struct e1000_hw *hw = &adapter->hw;
3472 struct netdev_hw_addr *ha;
3476 if (netdev_mc_empty(netdev)) {
3477 /* nothing to program, so clear mc list */
3478 igb_update_mc_addr_list(hw, NULL, 0);
3479 igb_restore_vf_multicasts(adapter);
3483 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3487 /* The shared function expects a packed array of only addresses. */
3489 netdev_for_each_mc_addr(ha, netdev)
3490 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3492 igb_update_mc_addr_list(hw, mta_list, i);
3495 return netdev_mc_count(netdev);
3499 * igb_write_uc_addr_list - write unicast addresses to RAR table
3500 * @netdev: network interface device structure
3502 * Writes unicast address list to the RAR table.
3503 * Returns: -ENOMEM on failure/insufficient address space
3504 * 0 on no addresses written
3505 * X on writing X addresses to the RAR table
3507 static int igb_write_uc_addr_list(struct net_device *netdev)
3509 struct igb_adapter *adapter = netdev_priv(netdev);
3510 struct e1000_hw *hw = &adapter->hw;
3511 unsigned int vfn = adapter->vfs_allocated_count;
3512 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3515 /* return ENOMEM indicating insufficient memory for addresses */
3516 if (netdev_uc_count(netdev) > rar_entries)
3519 if (!netdev_uc_empty(netdev) && rar_entries) {
3520 struct netdev_hw_addr *ha;
3522 netdev_for_each_uc_addr(ha, netdev) {
3525 igb_rar_set_qsel(adapter, ha->addr,
3531 /* write the addresses in reverse order to avoid write combining */
3532 for (; rar_entries > 0 ; rar_entries--) {
3533 wr32(E1000_RAH(rar_entries), 0);
3534 wr32(E1000_RAL(rar_entries), 0);
3542 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3543 * @netdev: network interface device structure
3545 * The set_rx_mode entry point is called whenever the unicast or multicast
3546 * address lists or the network interface flags are updated. This routine is
3547 * responsible for configuring the hardware for proper unicast, multicast,
3548 * promiscuous mode, and all-multi behavior.
3550 static void igb_set_rx_mode(struct net_device *netdev)
3552 struct igb_adapter *adapter = netdev_priv(netdev);
3553 struct e1000_hw *hw = &adapter->hw;
3554 unsigned int vfn = adapter->vfs_allocated_count;
3555 u32 rctl, vmolr = 0;
3558 /* Check for Promiscuous and All Multicast modes */
3559 rctl = rd32(E1000_RCTL);
3561 /* clear the effected bits */
3562 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3564 if (netdev->flags & IFF_PROMISC) {
3565 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3566 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3568 if (netdev->flags & IFF_ALLMULTI) {
3569 rctl |= E1000_RCTL_MPE;
3570 vmolr |= E1000_VMOLR_MPME;
3573 * Write addresses to the MTA, if the attempt fails
3574 * then we should just turn on promiscuous mode so
3575 * that we can at least receive multicast traffic
3577 count = igb_write_mc_addr_list(netdev);
3579 rctl |= E1000_RCTL_MPE;
3580 vmolr |= E1000_VMOLR_MPME;
3582 vmolr |= E1000_VMOLR_ROMPE;
3586 * Write addresses to available RAR registers, if there is not
3587 * sufficient space to store all the addresses then enable
3588 * unicast promiscuous mode
3590 count = igb_write_uc_addr_list(netdev);
3592 rctl |= E1000_RCTL_UPE;
3593 vmolr |= E1000_VMOLR_ROPE;
3595 rctl |= E1000_RCTL_VFE;
3597 wr32(E1000_RCTL, rctl);
3600 * In order to support SR-IOV and eventually VMDq it is necessary to set
3601 * the VMOLR to enable the appropriate modes. Without this workaround
3602 * we will have issues with VLAN tag stripping not being done for frames
3603 * that are only arriving because we are the default pool
3605 if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
3608 vmolr |= rd32(E1000_VMOLR(vfn)) &
3609 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3610 wr32(E1000_VMOLR(vfn), vmolr);
3611 igb_restore_vf_multicasts(adapter);
3614 static void igb_check_wvbr(struct igb_adapter *adapter)
3616 struct e1000_hw *hw = &adapter->hw;
3619 switch (hw->mac.type) {
3622 if (!(wvbr = rd32(E1000_WVBR)))
3629 adapter->wvbr |= wvbr;
3632 #define IGB_STAGGERED_QUEUE_OFFSET 8
3634 static void igb_spoof_check(struct igb_adapter *adapter)
3641 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3642 if (adapter->wvbr & (1 << j) ||
3643 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3644 dev_warn(&adapter->pdev->dev,
3645 "Spoof event(s) detected on VF %d\n", j);
3648 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3653 /* Need to wait a few seconds after link up to get diagnostic information from
3655 static void igb_update_phy_info(unsigned long data)
3657 struct igb_adapter *adapter = (struct igb_adapter *) data;
3658 igb_get_phy_info(&adapter->hw);
3662 * igb_has_link - check shared code for link and determine up/down
3663 * @adapter: pointer to driver private info
3665 bool igb_has_link(struct igb_adapter *adapter)
3667 struct e1000_hw *hw = &adapter->hw;
3668 bool link_active = false;
3671 /* get_link_status is set on LSC (link status) interrupt or
3672 * rx sequence error interrupt. get_link_status will stay
3673 * false until the e1000_check_for_link establishes link
3674 * for copper adapters ONLY
3676 switch (hw->phy.media_type) {
3677 case e1000_media_type_copper:
3678 if (hw->mac.get_link_status) {
3679 ret_val = hw->mac.ops.check_for_link(hw);
3680 link_active = !hw->mac.get_link_status;
3685 case e1000_media_type_internal_serdes:
3686 ret_val = hw->mac.ops.check_for_link(hw);
3687 link_active = hw->mac.serdes_has_link;
3690 case e1000_media_type_unknown:
3697 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3700 u32 ctrl_ext, thstat;
3702 /* check for thermal sensor event on i350 copper only */
3703 if (hw->mac.type == e1000_i350) {
3704 thstat = rd32(E1000_THSTAT);
3705 ctrl_ext = rd32(E1000_CTRL_EXT);
3707 if ((hw->phy.media_type == e1000_media_type_copper) &&
3708 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3709 ret = !!(thstat & event);
3717 * igb_watchdog - Timer Call-back
3718 * @data: pointer to adapter cast into an unsigned long
3720 static void igb_watchdog(unsigned long data)
3722 struct igb_adapter *adapter = (struct igb_adapter *)data;
3723 /* Do the rest outside of interrupt context */
3724 schedule_work(&adapter->watchdog_task);
3727 static void igb_watchdog_task(struct work_struct *work)
3729 struct igb_adapter *adapter = container_of(work,
3732 struct e1000_hw *hw = &adapter->hw;
3733 struct net_device *netdev = adapter->netdev;
3737 link = igb_has_link(adapter);
3739 /* Cancel scheduled suspend requests. */
3740 pm_runtime_resume(netdev->dev.parent);
3742 if (!netif_carrier_ok(netdev)) {
3744 hw->mac.ops.get_speed_and_duplex(hw,
3745 &adapter->link_speed,
3746 &adapter->link_duplex);
3748 ctrl = rd32(E1000_CTRL);
3749 /* Links status message must follow this format */
3750 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3751 "Duplex, Flow Control: %s\n",
3753 adapter->link_speed,
3754 adapter->link_duplex == FULL_DUPLEX ?
3756 (ctrl & E1000_CTRL_TFCE) &&
3757 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3758 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3759 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3761 /* check for thermal sensor event */
3762 if (igb_thermal_sensor_event(hw,
3763 E1000_THSTAT_LINK_THROTTLE)) {
3764 netdev_info(netdev, "The network adapter link "
3765 "speed was downshifted because it "
3769 /* adjust timeout factor according to speed/duplex */
3770 adapter->tx_timeout_factor = 1;
3771 switch (adapter->link_speed) {
3773 adapter->tx_timeout_factor = 14;
3776 /* maybe add some timeout factor ? */
3780 netif_carrier_on(netdev);
3782 igb_ping_all_vfs(adapter);
3783 igb_check_vf_rate_limit(adapter);
3785 /* link state has changed, schedule phy info update */
3786 if (!test_bit(__IGB_DOWN, &adapter->state))
3787 mod_timer(&adapter->phy_info_timer,
3788 round_jiffies(jiffies + 2 * HZ));
3791 if (netif_carrier_ok(netdev)) {
3792 adapter->link_speed = 0;
3793 adapter->link_duplex = 0;
3795 /* check for thermal sensor event */
3796 if (igb_thermal_sensor_event(hw,
3797 E1000_THSTAT_PWR_DOWN)) {
3798 netdev_err(netdev, "The network adapter was "
3799 "stopped because it overheated\n");
3802 /* Links status message must follow this format */
3803 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3805 netif_carrier_off(netdev);
3807 igb_ping_all_vfs(adapter);
3809 /* link state has changed, schedule phy info update */
3810 if (!test_bit(__IGB_DOWN, &adapter->state))
3811 mod_timer(&adapter->phy_info_timer,
3812 round_jiffies(jiffies + 2 * HZ));
3814 pm_schedule_suspend(netdev->dev.parent,
3819 spin_lock(&adapter->stats64_lock);
3820 igb_update_stats(adapter, &adapter->stats64);
3821 spin_unlock(&adapter->stats64_lock);
3823 for (i = 0; i < adapter->num_tx_queues; i++) {
3824 struct igb_ring *tx_ring = adapter->tx_ring[i];
3825 if (!netif_carrier_ok(netdev)) {
3826 /* We've lost link, so the controller stops DMA,
3827 * but we've got queued Tx work that's never going
3828 * to get done, so reset controller to flush Tx.
3829 * (Do the reset outside of interrupt context). */
3830 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3831 adapter->tx_timeout_count++;
3832 schedule_work(&adapter->reset_task);
3833 /* return immediately since reset is imminent */
3838 /* Force detection of hung controller every watchdog period */
3839 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3842 /* Cause software interrupt to ensure rx ring is cleaned */
3843 if (adapter->msix_entries) {
3845 for (i = 0; i < adapter->num_q_vectors; i++)
3846 eics |= adapter->q_vector[i]->eims_value;
3847 wr32(E1000_EICS, eics);
3849 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3852 igb_spoof_check(adapter);
3854 /* Reset the timer */
3855 if (!test_bit(__IGB_DOWN, &adapter->state))
3856 mod_timer(&adapter->watchdog_timer,
3857 round_jiffies(jiffies + 2 * HZ));
3860 enum latency_range {
3864 latency_invalid = 255
3868 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3870 * Stores a new ITR value based on strictly on packet size. This
3871 * algorithm is less sophisticated than that used in igb_update_itr,
3872 * due to the difficulty of synchronizing statistics across multiple
3873 * receive rings. The divisors and thresholds used by this function
3874 * were determined based on theoretical maximum wire speed and testing
3875 * data, in order to minimize response time while increasing bulk
3877 * This functionality is controlled by the InterruptThrottleRate module
3878 * parameter (see igb_param.c)
3879 * NOTE: This function is called only when operating in a multiqueue
3880 * receive environment.
3881 * @q_vector: pointer to q_vector
3883 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3885 int new_val = q_vector->itr_val;
3886 int avg_wire_size = 0;
3887 struct igb_adapter *adapter = q_vector->adapter;
3888 unsigned int packets;
3890 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3891 * ints/sec - ITR timer value of 120 ticks.
3893 if (adapter->link_speed != SPEED_1000) {
3894 new_val = IGB_4K_ITR;
3898 packets = q_vector->rx.total_packets;
3900 avg_wire_size = q_vector->rx.total_bytes / packets;
3902 packets = q_vector->tx.total_packets;
3904 avg_wire_size = max_t(u32, avg_wire_size,
3905 q_vector->tx.total_bytes / packets);
3907 /* if avg_wire_size isn't set no work was done */
3911 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3912 avg_wire_size += 24;
3914 /* Don't starve jumbo frames */
3915 avg_wire_size = min(avg_wire_size, 3000);
3917 /* Give a little boost to mid-size frames */
3918 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3919 new_val = avg_wire_size / 3;
3921 new_val = avg_wire_size / 2;
3923 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3924 if (new_val < IGB_20K_ITR &&
3925 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3926 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3927 new_val = IGB_20K_ITR;
3930 if (new_val != q_vector->itr_val) {
3931 q_vector->itr_val = new_val;
3932 q_vector->set_itr = 1;
3935 q_vector->rx.total_bytes = 0;
3936 q_vector->rx.total_packets = 0;
3937 q_vector->tx.total_bytes = 0;
3938 q_vector->tx.total_packets = 0;
3942 * igb_update_itr - update the dynamic ITR value based on statistics
3943 * Stores a new ITR value based on packets and byte
3944 * counts during the last interrupt. The advantage of per interrupt
3945 * computation is faster updates and more accurate ITR for the current
3946 * traffic pattern. Constants in this function were computed
3947 * based on theoretical maximum wire speed and thresholds were set based
3948 * on testing data as well as attempting to minimize response time
3949 * while increasing bulk throughput.
3950 * this functionality is controlled by the InterruptThrottleRate module
3951 * parameter (see igb_param.c)
3952 * NOTE: These calculations are only valid when operating in a single-
3953 * queue environment.
3954 * @q_vector: pointer to q_vector
3955 * @ring_container: ring info to update the itr for
3957 static void igb_update_itr(struct igb_q_vector *q_vector,
3958 struct igb_ring_container *ring_container)
3960 unsigned int packets = ring_container->total_packets;
3961 unsigned int bytes = ring_container->total_bytes;
3962 u8 itrval = ring_container->itr;
3964 /* no packets, exit with status unchanged */
3969 case lowest_latency:
3970 /* handle TSO and jumbo frames */
3971 if (bytes/packets > 8000)
3972 itrval = bulk_latency;
3973 else if ((packets < 5) && (bytes > 512))
3974 itrval = low_latency;
3976 case low_latency: /* 50 usec aka 20000 ints/s */
3977 if (bytes > 10000) {
3978 /* this if handles the TSO accounting */
3979 if (bytes/packets > 8000) {
3980 itrval = bulk_latency;
3981 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3982 itrval = bulk_latency;
3983 } else if ((packets > 35)) {
3984 itrval = lowest_latency;
3986 } else if (bytes/packets > 2000) {
3987 itrval = bulk_latency;
3988 } else if (packets <= 2 && bytes < 512) {
3989 itrval = lowest_latency;
3992 case bulk_latency: /* 250 usec aka 4000 ints/s */
3993 if (bytes > 25000) {
3995 itrval = low_latency;
3996 } else if (bytes < 1500) {
3997 itrval = low_latency;
4002 /* clear work counters since we have the values we need */
4003 ring_container->total_bytes = 0;
4004 ring_container->total_packets = 0;
4006 /* write updated itr to ring container */
4007 ring_container->itr = itrval;
4010 static void igb_set_itr(struct igb_q_vector *q_vector)
4012 struct igb_adapter *adapter = q_vector->adapter;
4013 u32 new_itr = q_vector->itr_val;
4016 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4017 if (adapter->link_speed != SPEED_1000) {
4019 new_itr = IGB_4K_ITR;
4023 igb_update_itr(q_vector, &q_vector->tx);
4024 igb_update_itr(q_vector, &q_vector->rx);
4026 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4028 /* conservative mode (itr 3) eliminates the lowest_latency setting */
4029 if (current_itr == lowest_latency &&
4030 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4031 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4032 current_itr = low_latency;
4034 switch (current_itr) {
4035 /* counts and packets in update_itr are dependent on these numbers */
4036 case lowest_latency:
4037 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4040 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4043 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
4050 if (new_itr != q_vector->itr_val) {
4051 /* this attempts to bias the interrupt rate towards Bulk
4052 * by adding intermediate steps when interrupt rate is
4054 new_itr = new_itr > q_vector->itr_val ?
4055 max((new_itr * q_vector->itr_val) /
4056 (new_itr + (q_vector->itr_val >> 2)),
4059 /* Don't write the value here; it resets the adapter's
4060 * internal timer, and causes us to delay far longer than
4061 * we should between interrupts. Instead, we write the ITR
4062 * value at the beginning of the next interrupt so the timing
4063 * ends up being correct.
4065 q_vector->itr_val = new_itr;
4066 q_vector->set_itr = 1;
4070 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4071 u32 type_tucmd, u32 mss_l4len_idx)
4073 struct e1000_adv_tx_context_desc *context_desc;
4074 u16 i = tx_ring->next_to_use;
4076 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4079 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4081 /* set bits to identify this as an advanced context descriptor */
4082 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4084 /* For 82575, context index must be unique per ring. */
4085 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4086 mss_l4len_idx |= tx_ring->reg_idx << 4;
4088 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4089 context_desc->seqnum_seed = 0;
4090 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4091 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4094 static int igb_tso(struct igb_ring *tx_ring,
4095 struct igb_tx_buffer *first,
4098 struct sk_buff *skb = first->skb;
4099 u32 vlan_macip_lens, type_tucmd;
4100 u32 mss_l4len_idx, l4len;
4102 if (!skb_is_gso(skb))
4105 if (skb_header_cloned(skb)) {
4106 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4111 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4112 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4114 if (first->protocol == __constant_htons(ETH_P_IP)) {
4115 struct iphdr *iph = ip_hdr(skb);
4118 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4122 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4123 first->tx_flags |= IGB_TX_FLAGS_TSO |
4126 } else if (skb_is_gso_v6(skb)) {
4127 ipv6_hdr(skb)->payload_len = 0;
4128 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4129 &ipv6_hdr(skb)->daddr,
4131 first->tx_flags |= IGB_TX_FLAGS_TSO |
4135 /* compute header lengths */
4136 l4len = tcp_hdrlen(skb);
4137 *hdr_len = skb_transport_offset(skb) + l4len;
4139 /* update gso size and bytecount with header size */
4140 first->gso_segs = skb_shinfo(skb)->gso_segs;
4141 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4144 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4145 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4147 /* VLAN MACLEN IPLEN */
4148 vlan_macip_lens = skb_network_header_len(skb);
4149 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4150 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4152 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4157 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4159 struct sk_buff *skb = first->skb;
4160 u32 vlan_macip_lens = 0;
4161 u32 mss_l4len_idx = 0;
4164 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4165 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4169 switch (first->protocol) {
4170 case __constant_htons(ETH_P_IP):
4171 vlan_macip_lens |= skb_network_header_len(skb);
4172 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4173 l4_hdr = ip_hdr(skb)->protocol;
4175 case __constant_htons(ETH_P_IPV6):
4176 vlan_macip_lens |= skb_network_header_len(skb);
4177 l4_hdr = ipv6_hdr(skb)->nexthdr;
4180 if (unlikely(net_ratelimit())) {
4181 dev_warn(tx_ring->dev,
4182 "partial checksum but proto=%x!\n",
4190 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4191 mss_l4len_idx = tcp_hdrlen(skb) <<
4192 E1000_ADVTXD_L4LEN_SHIFT;
4195 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4196 mss_l4len_idx = sizeof(struct sctphdr) <<
4197 E1000_ADVTXD_L4LEN_SHIFT;
4200 mss_l4len_idx = sizeof(struct udphdr) <<
4201 E1000_ADVTXD_L4LEN_SHIFT;
4204 if (unlikely(net_ratelimit())) {
4205 dev_warn(tx_ring->dev,
4206 "partial checksum but l4 proto=%x!\n",
4212 /* update TX checksum flag */
4213 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4216 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4217 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4219 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4222 static __le32 igb_tx_cmd_type(u32 tx_flags)
4224 /* set type for advanced descriptor with frame checksum insertion */
4225 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4226 E1000_ADVTXD_DCMD_IFCS |
4227 E1000_ADVTXD_DCMD_DEXT);
4229 /* set HW vlan bit if vlan is present */
4230 if (tx_flags & IGB_TX_FLAGS_VLAN)
4231 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4233 /* set timestamp bit if present */
4234 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4235 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4237 /* set segmentation bits for TSO */
4238 if (tx_flags & IGB_TX_FLAGS_TSO)
4239 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4244 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4245 union e1000_adv_tx_desc *tx_desc,
4246 u32 tx_flags, unsigned int paylen)
4248 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4250 /* 82575 requires a unique index per ring if any offload is enabled */
4251 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4252 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4253 olinfo_status |= tx_ring->reg_idx << 4;
4255 /* insert L4 checksum */
4256 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4257 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4259 /* insert IPv4 checksum */
4260 if (tx_flags & IGB_TX_FLAGS_IPV4)
4261 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4264 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4268 * The largest size we can write to the descriptor is 65535. In order to
4269 * maintain a power of two alignment we have to limit ourselves to 32K.
4271 #define IGB_MAX_TXD_PWR 15
4272 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4274 static void igb_tx_map(struct igb_ring *tx_ring,
4275 struct igb_tx_buffer *first,
4278 struct sk_buff *skb = first->skb;
4279 struct igb_tx_buffer *tx_buffer_info;
4280 union e1000_adv_tx_desc *tx_desc;
4282 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4283 unsigned int data_len = skb->data_len;
4284 unsigned int size = skb_headlen(skb);
4285 unsigned int paylen = skb->len - hdr_len;
4287 u32 tx_flags = first->tx_flags;
4288 u16 i = tx_ring->next_to_use;
4290 tx_desc = IGB_TX_DESC(tx_ring, i);
4292 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4293 cmd_type = igb_tx_cmd_type(tx_flags);
4295 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4296 if (dma_mapping_error(tx_ring->dev, dma))
4299 /* record length, and DMA address */
4300 first->length = size;
4302 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4305 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4306 tx_desc->read.cmd_type_len =
4307 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4311 if (i == tx_ring->count) {
4312 tx_desc = IGB_TX_DESC(tx_ring, 0);
4316 dma += IGB_MAX_DATA_PER_TXD;
4317 size -= IGB_MAX_DATA_PER_TXD;
4319 tx_desc->read.olinfo_status = 0;
4320 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4323 if (likely(!data_len))
4326 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4330 if (i == tx_ring->count) {
4331 tx_desc = IGB_TX_DESC(tx_ring, 0);
4335 size = skb_frag_size(frag);
4338 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4339 size, DMA_TO_DEVICE);
4340 if (dma_mapping_error(tx_ring->dev, dma))
4343 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4344 tx_buffer_info->length = size;
4345 tx_buffer_info->dma = dma;
4347 tx_desc->read.olinfo_status = 0;
4348 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4353 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4355 /* write last descriptor with RS and EOP bits */
4356 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4357 if (unlikely(skb->no_fcs))
4358 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4359 tx_desc->read.cmd_type_len = cmd_type;
4361 /* set the timestamp */
4362 first->time_stamp = jiffies;
4365 * Force memory writes to complete before letting h/w know there
4366 * are new descriptors to fetch. (Only applicable for weak-ordered
4367 * memory model archs, such as IA-64).
4369 * We also need this memory barrier to make certain all of the
4370 * status bits have been updated before next_to_watch is written.
4374 /* set next_to_watch value indicating a packet is present */
4375 first->next_to_watch = tx_desc;
4378 if (i == tx_ring->count)
4381 tx_ring->next_to_use = i;
4383 writel(i, tx_ring->tail);
4385 /* we need this if more than one processor can write to our tail
4386 * at a time, it syncronizes IO on IA64/Altix systems */
4392 dev_err(tx_ring->dev, "TX DMA map failed\n");
4394 /* clear dma mappings for failed tx_buffer_info map */
4396 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4397 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4398 if (tx_buffer_info == first)
4405 tx_ring->next_to_use = i;
4408 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4410 struct net_device *netdev = tx_ring->netdev;
4412 netif_stop_subqueue(netdev, tx_ring->queue_index);
4414 /* Herbert's original patch had:
4415 * smp_mb__after_netif_stop_queue();
4416 * but since that doesn't exist yet, just open code it. */
4419 /* We need to check again in a case another CPU has just
4420 * made room available. */
4421 if (igb_desc_unused(tx_ring) < size)
4425 netif_wake_subqueue(netdev, tx_ring->queue_index);
4427 u64_stats_update_begin(&tx_ring->tx_syncp2);
4428 tx_ring->tx_stats.restart_queue2++;
4429 u64_stats_update_end(&tx_ring->tx_syncp2);
4434 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4436 if (igb_desc_unused(tx_ring) >= size)
4438 return __igb_maybe_stop_tx(tx_ring, size);
4441 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4442 struct igb_ring *tx_ring)
4444 struct igb_tx_buffer *first;
4447 __be16 protocol = vlan_get_protocol(skb);
4450 /* need: 1 descriptor per page,
4451 * + 2 desc gap to keep tail from touching head,
4452 * + 1 desc for skb->data,
4453 * + 1 desc for context descriptor,
4454 * otherwise try next time */
4455 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4456 /* this is a hard error */
4457 return NETDEV_TX_BUSY;
4460 /* record the location of the first descriptor for this packet */
4461 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4463 first->bytecount = skb->len;
4464 first->gso_segs = 1;
4466 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4467 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4468 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4471 if (vlan_tx_tag_present(skb)) {
4472 tx_flags |= IGB_TX_FLAGS_VLAN;
4473 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4476 /* record initial flags and protocol */
4477 first->tx_flags = tx_flags;
4478 first->protocol = protocol;
4480 tso = igb_tso(tx_ring, first, &hdr_len);
4484 igb_tx_csum(tx_ring, first);
4486 igb_tx_map(tx_ring, first, hdr_len);
4488 /* Make sure there is space in the ring for the next send. */
4489 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4491 return NETDEV_TX_OK;
4494 igb_unmap_and_free_tx_resource(tx_ring, first);
4496 return NETDEV_TX_OK;
4499 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4500 struct sk_buff *skb)
4502 unsigned int r_idx = skb->queue_mapping;
4504 if (r_idx >= adapter->num_tx_queues)
4505 r_idx = r_idx % adapter->num_tx_queues;
4507 return adapter->tx_ring[r_idx];
4510 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4511 struct net_device *netdev)
4513 struct igb_adapter *adapter = netdev_priv(netdev);
4515 if (test_bit(__IGB_DOWN, &adapter->state)) {
4516 dev_kfree_skb_any(skb);
4517 return NETDEV_TX_OK;
4520 if (skb->len <= 0) {
4521 dev_kfree_skb_any(skb);
4522 return NETDEV_TX_OK;
4526 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4527 * in order to meet this minimum size requirement.
4529 if (skb->len < 17) {
4530 if (skb_padto(skb, 17))
4531 return NETDEV_TX_OK;
4535 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4539 * igb_tx_timeout - Respond to a Tx Hang
4540 * @netdev: network interface device structure
4542 static void igb_tx_timeout(struct net_device *netdev)
4544 struct igb_adapter *adapter = netdev_priv(netdev);
4545 struct e1000_hw *hw = &adapter->hw;
4547 /* Do the reset outside of interrupt context */
4548 adapter->tx_timeout_count++;
4550 if (hw->mac.type >= e1000_82580)
4551 hw->dev_spec._82575.global_device_reset = true;
4553 schedule_work(&adapter->reset_task);
4555 (adapter->eims_enable_mask & ~adapter->eims_other));
4558 static void igb_reset_task(struct work_struct *work)
4560 struct igb_adapter *adapter;
4561 adapter = container_of(work, struct igb_adapter, reset_task);
4564 netdev_err(adapter->netdev, "Reset adapter\n");
4565 igb_reinit_locked(adapter);
4569 * igb_get_stats64 - Get System Network Statistics
4570 * @netdev: network interface device structure
4571 * @stats: rtnl_link_stats64 pointer
4574 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4575 struct rtnl_link_stats64 *stats)
4577 struct igb_adapter *adapter = netdev_priv(netdev);
4579 spin_lock(&adapter->stats64_lock);
4580 igb_update_stats(adapter, &adapter->stats64);
4581 memcpy(stats, &adapter->stats64, sizeof(*stats));
4582 spin_unlock(&adapter->stats64_lock);
4588 * igb_change_mtu - Change the Maximum Transfer Unit
4589 * @netdev: network interface device structure
4590 * @new_mtu: new value for maximum frame size
4592 * Returns 0 on success, negative on failure
4594 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4596 struct igb_adapter *adapter = netdev_priv(netdev);
4597 struct pci_dev *pdev = adapter->pdev;
4598 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4600 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4601 dev_err(&pdev->dev, "Invalid MTU setting\n");
4605 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4606 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4607 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4611 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4614 /* igb_down has a dependency on max_frame_size */
4615 adapter->max_frame_size = max_frame;
4617 if (netif_running(netdev))
4620 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4621 netdev->mtu, new_mtu);
4622 netdev->mtu = new_mtu;
4624 if (netif_running(netdev))
4629 clear_bit(__IGB_RESETTING, &adapter->state);
4635 * igb_update_stats - Update the board statistics counters
4636 * @adapter: board private structure
4639 void igb_update_stats(struct igb_adapter *adapter,
4640 struct rtnl_link_stats64 *net_stats)
4642 struct e1000_hw *hw = &adapter->hw;
4643 struct pci_dev *pdev = adapter->pdev;
4649 u64 _bytes, _packets;
4651 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4654 * Prevent stats update while adapter is being reset, or if the pci
4655 * connection is down.
4657 if (adapter->link_speed == 0)
4659 if (pci_channel_offline(pdev))
4664 for (i = 0; i < adapter->num_rx_queues; i++) {
4665 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4666 struct igb_ring *ring = adapter->rx_ring[i];
4668 ring->rx_stats.drops += rqdpc_tmp;
4669 net_stats->rx_fifo_errors += rqdpc_tmp;
4672 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4673 _bytes = ring->rx_stats.bytes;
4674 _packets = ring->rx_stats.packets;
4675 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4677 packets += _packets;
4680 net_stats->rx_bytes = bytes;
4681 net_stats->rx_packets = packets;
4685 for (i = 0; i < adapter->num_tx_queues; i++) {
4686 struct igb_ring *ring = adapter->tx_ring[i];
4688 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4689 _bytes = ring->tx_stats.bytes;
4690 _packets = ring->tx_stats.packets;
4691 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4693 packets += _packets;
4695 net_stats->tx_bytes = bytes;
4696 net_stats->tx_packets = packets;
4698 /* read stats registers */
4699 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4700 adapter->stats.gprc += rd32(E1000_GPRC);
4701 adapter->stats.gorc += rd32(E1000_GORCL);
4702 rd32(E1000_GORCH); /* clear GORCL */
4703 adapter->stats.bprc += rd32(E1000_BPRC);
4704 adapter->stats.mprc += rd32(E1000_MPRC);
4705 adapter->stats.roc += rd32(E1000_ROC);
4707 adapter->stats.prc64 += rd32(E1000_PRC64);
4708 adapter->stats.prc127 += rd32(E1000_PRC127);
4709 adapter->stats.prc255 += rd32(E1000_PRC255);
4710 adapter->stats.prc511 += rd32(E1000_PRC511);
4711 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4712 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4713 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4714 adapter->stats.sec += rd32(E1000_SEC);
4716 mpc = rd32(E1000_MPC);
4717 adapter->stats.mpc += mpc;
4718 net_stats->rx_fifo_errors += mpc;
4719 adapter->stats.scc += rd32(E1000_SCC);
4720 adapter->stats.ecol += rd32(E1000_ECOL);
4721 adapter->stats.mcc += rd32(E1000_MCC);
4722 adapter->stats.latecol += rd32(E1000_LATECOL);
4723 adapter->stats.dc += rd32(E1000_DC);
4724 adapter->stats.rlec += rd32(E1000_RLEC);
4725 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4726 adapter->stats.xontxc += rd32(E1000_XONTXC);
4727 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4728 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4729 adapter->stats.fcruc += rd32(E1000_FCRUC);
4730 adapter->stats.gptc += rd32(E1000_GPTC);
4731 adapter->stats.gotc += rd32(E1000_GOTCL);
4732 rd32(E1000_GOTCH); /* clear GOTCL */
4733 adapter->stats.rnbc += rd32(E1000_RNBC);
4734 adapter->stats.ruc += rd32(E1000_RUC);
4735 adapter->stats.rfc += rd32(E1000_RFC);
4736 adapter->stats.rjc += rd32(E1000_RJC);
4737 adapter->stats.tor += rd32(E1000_TORH);
4738 adapter->stats.tot += rd32(E1000_TOTH);
4739 adapter->stats.tpr += rd32(E1000_TPR);
4741 adapter->stats.ptc64 += rd32(E1000_PTC64);
4742 adapter->stats.ptc127 += rd32(E1000_PTC127);
4743 adapter->stats.ptc255 += rd32(E1000_PTC255);
4744 adapter->stats.ptc511 += rd32(E1000_PTC511);
4745 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4746 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4748 adapter->stats.mptc += rd32(E1000_MPTC);
4749 adapter->stats.bptc += rd32(E1000_BPTC);
4751 adapter->stats.tpt += rd32(E1000_TPT);
4752 adapter->stats.colc += rd32(E1000_COLC);
4754 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4755 /* read internal phy specific stats */
4756 reg = rd32(E1000_CTRL_EXT);
4757 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4758 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4759 adapter->stats.tncrs += rd32(E1000_TNCRS);
4762 adapter->stats.tsctc += rd32(E1000_TSCTC);
4763 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4765 adapter->stats.iac += rd32(E1000_IAC);
4766 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4767 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4768 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4769 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4770 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4771 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4772 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4773 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4775 /* Fill out the OS statistics structure */
4776 net_stats->multicast = adapter->stats.mprc;
4777 net_stats->collisions = adapter->stats.colc;
4781 /* RLEC on some newer hardware can be incorrect so build
4782 * our own version based on RUC and ROC */
4783 net_stats->rx_errors = adapter->stats.rxerrc +
4784 adapter->stats.crcerrs + adapter->stats.algnerrc +
4785 adapter->stats.ruc + adapter->stats.roc +
4786 adapter->stats.cexterr;
4787 net_stats->rx_length_errors = adapter->stats.ruc +
4789 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4790 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4791 net_stats->rx_missed_errors = adapter->stats.mpc;
4794 net_stats->tx_errors = adapter->stats.ecol +
4795 adapter->stats.latecol;
4796 net_stats->tx_aborted_errors = adapter->stats.ecol;
4797 net_stats->tx_window_errors = adapter->stats.latecol;
4798 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4800 /* Tx Dropped needs to be maintained elsewhere */
4803 if (hw->phy.media_type == e1000_media_type_copper) {
4804 if ((adapter->link_speed == SPEED_1000) &&
4805 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4806 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4807 adapter->phy_stats.idle_errors += phy_tmp;
4811 /* Management Stats */
4812 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4813 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4814 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4817 reg = rd32(E1000_MANC);
4818 if (reg & E1000_MANC_EN_BMC2OS) {
4819 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4820 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4821 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4822 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4826 static irqreturn_t igb_msix_other(int irq, void *data)
4828 struct igb_adapter *adapter = data;
4829 struct e1000_hw *hw = &adapter->hw;
4830 u32 icr = rd32(E1000_ICR);
4831 /* reading ICR causes bit 31 of EICR to be cleared */
4833 if (icr & E1000_ICR_DRSTA)
4834 schedule_work(&adapter->reset_task);
4836 if (icr & E1000_ICR_DOUTSYNC) {
4837 /* HW is reporting DMA is out of sync */
4838 adapter->stats.doosync++;
4839 /* The DMA Out of Sync is also indication of a spoof event
4840 * in IOV mode. Check the Wrong VM Behavior register to
4841 * see if it is really a spoof event. */
4842 igb_check_wvbr(adapter);
4845 /* Check for a mailbox event */
4846 if (icr & E1000_ICR_VMMB)
4847 igb_msg_task(adapter);
4849 if (icr & E1000_ICR_LSC) {
4850 hw->mac.get_link_status = 1;
4851 /* guard against interrupt when we're going down */
4852 if (!test_bit(__IGB_DOWN, &adapter->state))
4853 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4856 wr32(E1000_EIMS, adapter->eims_other);
4861 static void igb_write_itr(struct igb_q_vector *q_vector)
4863 struct igb_adapter *adapter = q_vector->adapter;
4864 u32 itr_val = q_vector->itr_val & 0x7FFC;
4866 if (!q_vector->set_itr)
4872 if (adapter->hw.mac.type == e1000_82575)
4873 itr_val |= itr_val << 16;
4875 itr_val |= E1000_EITR_CNT_IGNR;
4877 writel(itr_val, q_vector->itr_register);
4878 q_vector->set_itr = 0;
4881 static irqreturn_t igb_msix_ring(int irq, void *data)
4883 struct igb_q_vector *q_vector = data;
4885 /* Write the ITR value calculated from the previous interrupt. */
4886 igb_write_itr(q_vector);
4888 napi_schedule(&q_vector->napi);
4893 #ifdef CONFIG_IGB_DCA
4894 static void igb_update_dca(struct igb_q_vector *q_vector)
4896 struct igb_adapter *adapter = q_vector->adapter;
4897 struct e1000_hw *hw = &adapter->hw;
4898 int cpu = get_cpu();
4900 if (q_vector->cpu == cpu)
4903 if (q_vector->tx.ring) {
4904 int q = q_vector->tx.ring->reg_idx;
4905 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4906 if (hw->mac.type == e1000_82575) {
4907 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4908 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4910 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4911 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4912 E1000_DCA_TXCTRL_CPUID_SHIFT;
4914 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4915 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4917 if (q_vector->rx.ring) {
4918 int q = q_vector->rx.ring->reg_idx;
4919 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4920 if (hw->mac.type == e1000_82575) {
4921 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4922 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4924 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4925 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4926 E1000_DCA_RXCTRL_CPUID_SHIFT;
4928 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4929 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4930 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4931 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4933 q_vector->cpu = cpu;
4938 static void igb_setup_dca(struct igb_adapter *adapter)
4940 struct e1000_hw *hw = &adapter->hw;
4943 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4946 /* Always use CB2 mode, difference is masked in the CB driver. */
4947 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4949 for (i = 0; i < adapter->num_q_vectors; i++) {
4950 adapter->q_vector[i]->cpu = -1;
4951 igb_update_dca(adapter->q_vector[i]);
4955 static int __igb_notify_dca(struct device *dev, void *data)
4957 struct net_device *netdev = dev_get_drvdata(dev);
4958 struct igb_adapter *adapter = netdev_priv(netdev);
4959 struct pci_dev *pdev = adapter->pdev;
4960 struct e1000_hw *hw = &adapter->hw;
4961 unsigned long event = *(unsigned long *)data;
4964 case DCA_PROVIDER_ADD:
4965 /* if already enabled, don't do it again */
4966 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4968 if (dca_add_requester(dev) == 0) {
4969 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4970 dev_info(&pdev->dev, "DCA enabled\n");
4971 igb_setup_dca(adapter);
4974 /* Fall Through since DCA is disabled. */
4975 case DCA_PROVIDER_REMOVE:
4976 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4977 /* without this a class_device is left
4978 * hanging around in the sysfs model */
4979 dca_remove_requester(dev);
4980 dev_info(&pdev->dev, "DCA disabled\n");
4981 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4982 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4990 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4995 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4998 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
5000 #endif /* CONFIG_IGB_DCA */
5002 #ifdef CONFIG_PCI_IOV
5003 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
5005 unsigned char mac_addr[ETH_ALEN];
5006 struct pci_dev *pdev = adapter->pdev;
5007 struct e1000_hw *hw = &adapter->hw;
5008 struct pci_dev *pvfdev;
5009 unsigned int device_id;
5012 eth_random_addr(mac_addr);
5013 igb_set_vf_mac(adapter, vf, mac_addr);
5015 switch (adapter->hw.mac.type) {
5017 device_id = IGB_82576_VF_DEV_ID;
5018 /* VF Stride for 82576 is 2 */
5019 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5023 device_id = IGB_I350_VF_DEV_ID;
5024 /* VF Stride for I350 is 4 */
5025 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5034 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5036 if (pvfdev->devfn == thisvf_devfn)
5038 pvfdev = pci_get_device(hw->vendor_id,
5043 adapter->vf_data[vf].vfdev = pvfdev;
5046 "Couldn't find pci dev ptr for VF %4.4x\n",
5048 return pvfdev != NULL;
5051 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5053 struct e1000_hw *hw = &adapter->hw;
5054 struct pci_dev *pdev = adapter->pdev;
5055 struct pci_dev *pvfdev;
5058 unsigned int device_id;
5061 switch (adapter->hw.mac.type) {
5063 device_id = IGB_82576_VF_DEV_ID;
5064 /* VF Stride for 82576 is 2 */
5068 device_id = IGB_I350_VF_DEV_ID;
5069 /* VF Stride for I350 is 4 */
5078 vf_devfn = pdev->devfn + 0x80;
5079 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5081 if (pvfdev->devfn == vf_devfn &&
5082 (pvfdev->bus->number >= pdev->bus->number))
5084 vf_devfn += vf_stride;
5085 pvfdev = pci_get_device(hw->vendor_id,
5092 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5095 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5096 if (adapter->vf_data[i].vfdev) {
5097 if (adapter->vf_data[i].vfdev->dev_flags &
5098 PCI_DEV_FLAGS_ASSIGNED)
5106 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5108 struct e1000_hw *hw = &adapter->hw;
5112 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5113 ping = E1000_PF_CONTROL_MSG;
5114 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5115 ping |= E1000_VT_MSGTYPE_CTS;
5116 igb_write_mbx(hw, &ping, 1, i);
5120 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5122 struct e1000_hw *hw = &adapter->hw;
5123 u32 vmolr = rd32(E1000_VMOLR(vf));
5124 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5126 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5127 IGB_VF_FLAG_MULTI_PROMISC);
5128 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5130 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5131 vmolr |= E1000_VMOLR_MPME;
5132 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5133 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5136 * if we have hashes and we are clearing a multicast promisc
5137 * flag we need to write the hashes to the MTA as this step
5138 * was previously skipped
5140 if (vf_data->num_vf_mc_hashes > 30) {
5141 vmolr |= E1000_VMOLR_MPME;
5142 } else if (vf_data->num_vf_mc_hashes) {
5144 vmolr |= E1000_VMOLR_ROMPE;
5145 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5146 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5150 wr32(E1000_VMOLR(vf), vmolr);
5152 /* there are flags left unprocessed, likely not supported */
5153 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5160 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5161 u32 *msgbuf, u32 vf)
5163 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5164 u16 *hash_list = (u16 *)&msgbuf[1];
5165 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5168 /* salt away the number of multicast addresses assigned
5169 * to this VF for later use to restore when the PF multi cast
5172 vf_data->num_vf_mc_hashes = n;
5174 /* only up to 30 hash values supported */
5178 /* store the hashes for later use */
5179 for (i = 0; i < n; i++)
5180 vf_data->vf_mc_hashes[i] = hash_list[i];
5182 /* Flush and reset the mta with the new values */
5183 igb_set_rx_mode(adapter->netdev);
5188 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5190 struct e1000_hw *hw = &adapter->hw;
5191 struct vf_data_storage *vf_data;
5194 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5195 u32 vmolr = rd32(E1000_VMOLR(i));
5196 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5198 vf_data = &adapter->vf_data[i];
5200 if ((vf_data->num_vf_mc_hashes > 30) ||
5201 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5202 vmolr |= E1000_VMOLR_MPME;
5203 } else if (vf_data->num_vf_mc_hashes) {
5204 vmolr |= E1000_VMOLR_ROMPE;
5205 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5206 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5208 wr32(E1000_VMOLR(i), vmolr);
5212 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5214 struct e1000_hw *hw = &adapter->hw;
5215 u32 pool_mask, reg, vid;
5218 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5220 /* Find the vlan filter for this id */
5221 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5222 reg = rd32(E1000_VLVF(i));
5224 /* remove the vf from the pool */
5227 /* if pool is empty then remove entry from vfta */
5228 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5229 (reg & E1000_VLVF_VLANID_ENABLE)) {
5231 vid = reg & E1000_VLVF_VLANID_MASK;
5232 igb_vfta_set(hw, vid, false);
5235 wr32(E1000_VLVF(i), reg);
5238 adapter->vf_data[vf].vlans_enabled = 0;
5241 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5243 struct e1000_hw *hw = &adapter->hw;
5246 /* The vlvf table only exists on 82576 hardware and newer */
5247 if (hw->mac.type < e1000_82576)
5250 /* we only need to do this if VMDq is enabled */
5251 if (!adapter->vfs_allocated_count)
5254 /* Find the vlan filter for this id */
5255 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5256 reg = rd32(E1000_VLVF(i));
5257 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5258 vid == (reg & E1000_VLVF_VLANID_MASK))
5263 if (i == E1000_VLVF_ARRAY_SIZE) {
5264 /* Did not find a matching VLAN ID entry that was
5265 * enabled. Search for a free filter entry, i.e.
5266 * one without the enable bit set
5268 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5269 reg = rd32(E1000_VLVF(i));
5270 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5274 if (i < E1000_VLVF_ARRAY_SIZE) {
5275 /* Found an enabled/available entry */
5276 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5278 /* if !enabled we need to set this up in vfta */
5279 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5280 /* add VID to filter table */
5281 igb_vfta_set(hw, vid, true);
5282 reg |= E1000_VLVF_VLANID_ENABLE;
5284 reg &= ~E1000_VLVF_VLANID_MASK;
5286 wr32(E1000_VLVF(i), reg);
5288 /* do not modify RLPML for PF devices */
5289 if (vf >= adapter->vfs_allocated_count)
5292 if (!adapter->vf_data[vf].vlans_enabled) {
5294 reg = rd32(E1000_VMOLR(vf));
5295 size = reg & E1000_VMOLR_RLPML_MASK;
5297 reg &= ~E1000_VMOLR_RLPML_MASK;
5299 wr32(E1000_VMOLR(vf), reg);
5302 adapter->vf_data[vf].vlans_enabled++;
5305 if (i < E1000_VLVF_ARRAY_SIZE) {
5306 /* remove vf from the pool */
5307 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5308 /* if pool is empty then remove entry from vfta */
5309 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5311 igb_vfta_set(hw, vid, false);
5313 wr32(E1000_VLVF(i), reg);
5315 /* do not modify RLPML for PF devices */
5316 if (vf >= adapter->vfs_allocated_count)
5319 adapter->vf_data[vf].vlans_enabled--;
5320 if (!adapter->vf_data[vf].vlans_enabled) {
5322 reg = rd32(E1000_VMOLR(vf));
5323 size = reg & E1000_VMOLR_RLPML_MASK;
5325 reg &= ~E1000_VMOLR_RLPML_MASK;
5327 wr32(E1000_VMOLR(vf), reg);
5334 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5336 struct e1000_hw *hw = &adapter->hw;
5339 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5341 wr32(E1000_VMVIR(vf), 0);
5344 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5345 int vf, u16 vlan, u8 qos)
5348 struct igb_adapter *adapter = netdev_priv(netdev);
5350 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5353 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5356 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5357 igb_set_vmolr(adapter, vf, !vlan);
5358 adapter->vf_data[vf].pf_vlan = vlan;
5359 adapter->vf_data[vf].pf_qos = qos;
5360 dev_info(&adapter->pdev->dev,
5361 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5362 if (test_bit(__IGB_DOWN, &adapter->state)) {
5363 dev_warn(&adapter->pdev->dev,
5364 "The VF VLAN has been set,"
5365 " but the PF device is not up.\n");
5366 dev_warn(&adapter->pdev->dev,
5367 "Bring the PF device up before"
5368 " attempting to use the VF device.\n");
5371 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5373 igb_set_vmvir(adapter, vlan, vf);
5374 igb_set_vmolr(adapter, vf, true);
5375 adapter->vf_data[vf].pf_vlan = 0;
5376 adapter->vf_data[vf].pf_qos = 0;
5382 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5384 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5385 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5387 return igb_vlvf_set(adapter, vid, add, vf);
5390 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5392 /* clear flags - except flag that indicates PF has set the MAC */
5393 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5394 adapter->vf_data[vf].last_nack = jiffies;
5396 /* reset offloads to defaults */
5397 igb_set_vmolr(adapter, vf, true);
5399 /* reset vlans for device */
5400 igb_clear_vf_vfta(adapter, vf);
5401 if (adapter->vf_data[vf].pf_vlan)
5402 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5403 adapter->vf_data[vf].pf_vlan,
5404 adapter->vf_data[vf].pf_qos);
5406 igb_clear_vf_vfta(adapter, vf);
5408 /* reset multicast table array for vf */
5409 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5411 /* Flush and reset the mta with the new values */
5412 igb_set_rx_mode(adapter->netdev);
5415 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5417 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5419 /* generate a new mac address as we were hotplug removed/added */
5420 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5421 eth_random_addr(vf_mac);
5423 /* process remaining reset events */
5424 igb_vf_reset(adapter, vf);
5427 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5429 struct e1000_hw *hw = &adapter->hw;
5430 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5431 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5433 u8 *addr = (u8 *)(&msgbuf[1]);
5435 /* process all the same items cleared in a function level reset */
5436 igb_vf_reset(adapter, vf);
5438 /* set vf mac address */
5439 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5441 /* enable transmit and receive for vf */
5442 reg = rd32(E1000_VFTE);
5443 wr32(E1000_VFTE, reg | (1 << vf));
5444 reg = rd32(E1000_VFRE);
5445 wr32(E1000_VFRE, reg | (1 << vf));
5447 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5449 /* reply to reset with ack and vf mac address */
5450 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5451 memcpy(addr, vf_mac, 6);
5452 igb_write_mbx(hw, msgbuf, 3, vf);
5455 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5458 * The VF MAC Address is stored in a packed array of bytes
5459 * starting at the second 32 bit word of the msg array
5461 unsigned char *addr = (char *)&msg[1];
5464 if (is_valid_ether_addr(addr))
5465 err = igb_set_vf_mac(adapter, vf, addr);
5470 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5472 struct e1000_hw *hw = &adapter->hw;
5473 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5474 u32 msg = E1000_VT_MSGTYPE_NACK;
5476 /* if device isn't clear to send it shouldn't be reading either */
5477 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5478 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5479 igb_write_mbx(hw, &msg, 1, vf);
5480 vf_data->last_nack = jiffies;
5484 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5486 struct pci_dev *pdev = adapter->pdev;
5487 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5488 struct e1000_hw *hw = &adapter->hw;
5489 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5492 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5495 /* if receive failed revoke VF CTS stats and restart init */
5496 dev_err(&pdev->dev, "Error receiving message from VF\n");
5497 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5498 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5503 /* this is a message we already processed, do nothing */
5504 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5508 * until the vf completes a reset it should not be
5509 * allowed to start any configuration.
5512 if (msgbuf[0] == E1000_VF_RESET) {
5513 igb_vf_reset_msg(adapter, vf);
5517 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5518 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5524 switch ((msgbuf[0] & 0xFFFF)) {
5525 case E1000_VF_SET_MAC_ADDR:
5527 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5528 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5530 dev_warn(&pdev->dev,
5531 "VF %d attempted to override administratively "
5532 "set MAC address\nReload the VF driver to "
5533 "resume operations\n", vf);
5535 case E1000_VF_SET_PROMISC:
5536 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5538 case E1000_VF_SET_MULTICAST:
5539 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5541 case E1000_VF_SET_LPE:
5542 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5544 case E1000_VF_SET_VLAN:
5546 if (vf_data->pf_vlan)
5547 dev_warn(&pdev->dev,
5548 "VF %d attempted to override administratively "
5549 "set VLAN tag\nReload the VF driver to "
5550 "resume operations\n", vf);
5552 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5555 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5560 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5562 /* notify the VF of the results of what it sent us */
5564 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5566 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5568 igb_write_mbx(hw, msgbuf, 1, vf);
5571 static void igb_msg_task(struct igb_adapter *adapter)
5573 struct e1000_hw *hw = &adapter->hw;
5576 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5577 /* process any reset requests */
5578 if (!igb_check_for_rst(hw, vf))
5579 igb_vf_reset_event(adapter, vf);
5581 /* process any messages pending */
5582 if (!igb_check_for_msg(hw, vf))
5583 igb_rcv_msg_from_vf(adapter, vf);
5585 /* process any acks */
5586 if (!igb_check_for_ack(hw, vf))
5587 igb_rcv_ack_from_vf(adapter, vf);
5592 * igb_set_uta - Set unicast filter table address
5593 * @adapter: board private structure
5595 * The unicast table address is a register array of 32-bit registers.
5596 * The table is meant to be used in a way similar to how the MTA is used
5597 * however due to certain limitations in the hardware it is necessary to
5598 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5599 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5601 static void igb_set_uta(struct igb_adapter *adapter)
5603 struct e1000_hw *hw = &adapter->hw;
5606 /* The UTA table only exists on 82576 hardware and newer */
5607 if (hw->mac.type < e1000_82576)
5610 /* we only need to do this if VMDq is enabled */
5611 if (!adapter->vfs_allocated_count)
5614 for (i = 0; i < hw->mac.uta_reg_count; i++)
5615 array_wr32(E1000_UTA, i, ~0);
5619 * igb_intr_msi - Interrupt Handler
5620 * @irq: interrupt number
5621 * @data: pointer to a network interface device structure
5623 static irqreturn_t igb_intr_msi(int irq, void *data)
5625 struct igb_adapter *adapter = data;
5626 struct igb_q_vector *q_vector = adapter->q_vector[0];
5627 struct e1000_hw *hw = &adapter->hw;
5628 /* read ICR disables interrupts using IAM */
5629 u32 icr = rd32(E1000_ICR);
5631 igb_write_itr(q_vector);
5633 if (icr & E1000_ICR_DRSTA)
5634 schedule_work(&adapter->reset_task);
5636 if (icr & E1000_ICR_DOUTSYNC) {
5637 /* HW is reporting DMA is out of sync */
5638 adapter->stats.doosync++;
5641 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5642 hw->mac.get_link_status = 1;
5643 if (!test_bit(__IGB_DOWN, &adapter->state))
5644 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5647 napi_schedule(&q_vector->napi);
5653 * igb_intr - Legacy Interrupt Handler
5654 * @irq: interrupt number
5655 * @data: pointer to a network interface device structure
5657 static irqreturn_t igb_intr(int irq, void *data)
5659 struct igb_adapter *adapter = data;
5660 struct igb_q_vector *q_vector = adapter->q_vector[0];
5661 struct e1000_hw *hw = &adapter->hw;
5662 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5663 * need for the IMC write */
5664 u32 icr = rd32(E1000_ICR);
5666 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5667 * not set, then the adapter didn't send an interrupt */
5668 if (!(icr & E1000_ICR_INT_ASSERTED))
5671 igb_write_itr(q_vector);
5673 if (icr & E1000_ICR_DRSTA)
5674 schedule_work(&adapter->reset_task);
5676 if (icr & E1000_ICR_DOUTSYNC) {
5677 /* HW is reporting DMA is out of sync */
5678 adapter->stats.doosync++;
5681 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5682 hw->mac.get_link_status = 1;
5683 /* guard against interrupt when we're going down */
5684 if (!test_bit(__IGB_DOWN, &adapter->state))
5685 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5688 napi_schedule(&q_vector->napi);
5693 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5695 struct igb_adapter *adapter = q_vector->adapter;
5696 struct e1000_hw *hw = &adapter->hw;
5698 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5699 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5700 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5701 igb_set_itr(q_vector);
5703 igb_update_ring_itr(q_vector);
5706 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5707 if (adapter->msix_entries)
5708 wr32(E1000_EIMS, q_vector->eims_value);
5710 igb_irq_enable(adapter);
5715 * igb_poll - NAPI Rx polling callback
5716 * @napi: napi polling structure
5717 * @budget: count of how many packets we should handle
5719 static int igb_poll(struct napi_struct *napi, int budget)
5721 struct igb_q_vector *q_vector = container_of(napi,
5722 struct igb_q_vector,
5724 bool clean_complete = true;
5726 #ifdef CONFIG_IGB_DCA
5727 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5728 igb_update_dca(q_vector);
5730 if (q_vector->tx.ring)
5731 clean_complete = igb_clean_tx_irq(q_vector);
5733 if (q_vector->rx.ring)
5734 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5736 /* If all work not completed, return budget and keep polling */
5737 if (!clean_complete)
5740 /* If not enough Rx work done, exit the polling mode */
5741 napi_complete(napi);
5742 igb_ring_irq_enable(q_vector);
5747 #ifdef CONFIG_IGB_PTP
5749 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5750 * @q_vector: pointer to q_vector containing needed info
5751 * @buffer: pointer to igb_tx_buffer structure
5753 * If we were asked to do hardware stamping and such a time stamp is
5754 * available, then it must have been for this skb here because we only
5755 * allow only one such packet into the queue.
5757 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5758 struct igb_tx_buffer *buffer_info)
5760 struct igb_adapter *adapter = q_vector->adapter;
5761 struct e1000_hw *hw = &adapter->hw;
5762 struct skb_shared_hwtstamps shhwtstamps;
5765 /* if skb does not support hw timestamp or TX stamp not valid exit */
5766 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5767 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5770 regval = rd32(E1000_TXSTMPL);
5771 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5773 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5774 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5779 * igb_clean_tx_irq - Reclaim resources after transmit completes
5780 * @q_vector: pointer to q_vector containing needed info
5782 * returns true if ring is completely cleaned
5784 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5786 struct igb_adapter *adapter = q_vector->adapter;
5787 struct igb_ring *tx_ring = q_vector->tx.ring;
5788 struct igb_tx_buffer *tx_buffer;
5789 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5790 unsigned int total_bytes = 0, total_packets = 0;
5791 unsigned int budget = q_vector->tx.work_limit;
5792 unsigned int i = tx_ring->next_to_clean;
5794 if (test_bit(__IGB_DOWN, &adapter->state))
5797 tx_buffer = &tx_ring->tx_buffer_info[i];
5798 tx_desc = IGB_TX_DESC(tx_ring, i);
5799 i -= tx_ring->count;
5801 for (; budget; budget--) {
5802 eop_desc = tx_buffer->next_to_watch;
5804 /* prevent any other reads prior to eop_desc */
5807 /* if next_to_watch is not set then there is no work pending */
5811 /* if DD is not set pending work has not been completed */
5812 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5815 /* clear next_to_watch to prevent false hangs */
5816 tx_buffer->next_to_watch = NULL;
5818 /* update the statistics for this packet */
5819 total_bytes += tx_buffer->bytecount;
5820 total_packets += tx_buffer->gso_segs;
5822 #ifdef CONFIG_IGB_PTP
5823 /* retrieve hardware timestamp */
5824 igb_tx_hwtstamp(q_vector, tx_buffer);
5828 dev_kfree_skb_any(tx_buffer->skb);
5829 tx_buffer->skb = NULL;
5831 /* unmap skb header data */
5832 dma_unmap_single(tx_ring->dev,
5837 /* clear last DMA location and unmap remaining buffers */
5838 while (tx_desc != eop_desc) {
5845 i -= tx_ring->count;
5846 tx_buffer = tx_ring->tx_buffer_info;
5847 tx_desc = IGB_TX_DESC(tx_ring, 0);
5850 /* unmap any remaining paged data */
5851 if (tx_buffer->dma) {
5852 dma_unmap_page(tx_ring->dev,
5859 /* clear last DMA location */
5862 /* move us one more past the eop_desc for start of next pkt */
5867 i -= tx_ring->count;
5868 tx_buffer = tx_ring->tx_buffer_info;
5869 tx_desc = IGB_TX_DESC(tx_ring, 0);
5873 netdev_tx_completed_queue(txring_txq(tx_ring),
5874 total_packets, total_bytes);
5875 i += tx_ring->count;
5876 tx_ring->next_to_clean = i;
5877 u64_stats_update_begin(&tx_ring->tx_syncp);
5878 tx_ring->tx_stats.bytes += total_bytes;
5879 tx_ring->tx_stats.packets += total_packets;
5880 u64_stats_update_end(&tx_ring->tx_syncp);
5881 q_vector->tx.total_bytes += total_bytes;
5882 q_vector->tx.total_packets += total_packets;
5884 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5885 struct e1000_hw *hw = &adapter->hw;
5887 eop_desc = tx_buffer->next_to_watch;
5889 /* Detect a transmit hang in hardware, this serializes the
5890 * check with the clearing of time_stamp and movement of i */
5891 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5893 time_after(jiffies, tx_buffer->time_stamp +
5894 (adapter->tx_timeout_factor * HZ)) &&
5895 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5897 /* detected Tx unit hang */
5898 dev_err(tx_ring->dev,
5899 "Detected Tx Unit Hang\n"
5903 " next_to_use <%x>\n"
5904 " next_to_clean <%x>\n"
5905 "buffer_info[next_to_clean]\n"
5906 " time_stamp <%lx>\n"
5907 " next_to_watch <%p>\n"
5909 " desc.status <%x>\n",
5910 tx_ring->queue_index,
5911 rd32(E1000_TDH(tx_ring->reg_idx)),
5912 readl(tx_ring->tail),
5913 tx_ring->next_to_use,
5914 tx_ring->next_to_clean,
5915 tx_buffer->time_stamp,
5918 eop_desc->wb.status);
5919 netif_stop_subqueue(tx_ring->netdev,
5920 tx_ring->queue_index);
5922 /* we are about to reset, no point in enabling stuff */
5927 if (unlikely(total_packets &&
5928 netif_carrier_ok(tx_ring->netdev) &&
5929 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5930 /* Make sure that anybody stopping the queue after this
5931 * sees the new next_to_clean.
5934 if (__netif_subqueue_stopped(tx_ring->netdev,
5935 tx_ring->queue_index) &&
5936 !(test_bit(__IGB_DOWN, &adapter->state))) {
5937 netif_wake_subqueue(tx_ring->netdev,
5938 tx_ring->queue_index);
5940 u64_stats_update_begin(&tx_ring->tx_syncp);
5941 tx_ring->tx_stats.restart_queue++;
5942 u64_stats_update_end(&tx_ring->tx_syncp);
5949 static inline void igb_rx_checksum(struct igb_ring *ring,
5950 union e1000_adv_rx_desc *rx_desc,
5951 struct sk_buff *skb)
5953 skb_checksum_none_assert(skb);
5955 /* Ignore Checksum bit is set */
5956 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5959 /* Rx checksum disabled via ethtool */
5960 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5963 /* TCP/UDP checksum error bit is set */
5964 if (igb_test_staterr(rx_desc,
5965 E1000_RXDEXT_STATERR_TCPE |
5966 E1000_RXDEXT_STATERR_IPE)) {
5968 * work around errata with sctp packets where the TCPE aka
5969 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5970 * packets, (aka let the stack check the crc32c)
5972 if (!((skb->len == 60) &&
5973 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5974 u64_stats_update_begin(&ring->rx_syncp);
5975 ring->rx_stats.csum_err++;
5976 u64_stats_update_end(&ring->rx_syncp);
5978 /* let the stack verify checksum errors */
5981 /* It must be a TCP or UDP packet with a valid checksum */
5982 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5983 E1000_RXD_STAT_UDPCS))
5984 skb->ip_summed = CHECKSUM_UNNECESSARY;
5986 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5987 le32_to_cpu(rx_desc->wb.upper.status_error));
5990 static inline void igb_rx_hash(struct igb_ring *ring,
5991 union e1000_adv_rx_desc *rx_desc,
5992 struct sk_buff *skb)
5994 if (ring->netdev->features & NETIF_F_RXHASH)
5995 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5998 #ifdef CONFIG_IGB_PTP
5999 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6000 union e1000_adv_rx_desc *rx_desc,
6001 struct sk_buff *skb)
6003 struct igb_adapter *adapter = q_vector->adapter;
6004 struct e1000_hw *hw = &adapter->hw;
6007 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6008 E1000_RXDADV_STAT_TS))
6012 * If this bit is set, then the RX registers contain the time stamp. No
6013 * other packet will be time stamped until we read these registers, so
6014 * read the registers to make them available again. Because only one
6015 * packet can be time stamped at a time, we know that the register
6016 * values must belong to this one here and therefore we don't need to
6017 * compare any of the additional attributes stored for it.
6019 * If nothing went wrong, then it should have a shared tx_flags that we
6020 * can turn into a skb_shared_hwtstamps.
6022 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6023 u32 *stamp = (u32 *)skb->data;
6024 regval = le32_to_cpu(*(stamp + 2));
6025 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6026 skb_pull(skb, IGB_TS_HDR_LEN);
6028 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6031 regval = rd32(E1000_RXSTMPL);
6032 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6035 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6039 static void igb_rx_vlan(struct igb_ring *ring,
6040 union e1000_adv_rx_desc *rx_desc,
6041 struct sk_buff *skb)
6043 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6045 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6046 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6047 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6049 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6051 __vlan_hwaccel_put_tag(skb, vid);
6055 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6057 /* HW will not DMA in data larger than the given buffer, even if it
6058 * parses the (NFS, of course) header to be larger. In that case, it
6059 * fills the header buffer and spills the rest into the page.
6061 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6062 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6063 if (hlen > IGB_RX_HDR_LEN)
6064 hlen = IGB_RX_HDR_LEN;
6068 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6070 struct igb_ring *rx_ring = q_vector->rx.ring;
6071 union e1000_adv_rx_desc *rx_desc;
6072 const int current_node = numa_node_id();
6073 unsigned int total_bytes = 0, total_packets = 0;
6074 u16 cleaned_count = igb_desc_unused(rx_ring);
6075 u16 i = rx_ring->next_to_clean;
6077 rx_desc = IGB_RX_DESC(rx_ring, i);
6079 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6080 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6081 struct sk_buff *skb = buffer_info->skb;
6082 union e1000_adv_rx_desc *next_rxd;
6084 buffer_info->skb = NULL;
6085 prefetch(skb->data);
6088 if (i == rx_ring->count)
6091 next_rxd = IGB_RX_DESC(rx_ring, i);
6095 * This memory barrier is needed to keep us from reading
6096 * any other fields out of the rx_desc until we know the
6097 * RXD_STAT_DD bit is set
6101 if (!skb_is_nonlinear(skb)) {
6102 __skb_put(skb, igb_get_hlen(rx_desc));
6103 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6106 buffer_info->dma = 0;
6109 if (rx_desc->wb.upper.length) {
6110 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6112 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6114 buffer_info->page_offset,
6118 skb->data_len += length;
6119 skb->truesize += PAGE_SIZE / 2;
6121 if ((page_count(buffer_info->page) != 1) ||
6122 (page_to_nid(buffer_info->page) != current_node))
6123 buffer_info->page = NULL;
6125 get_page(buffer_info->page);
6127 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6128 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6129 buffer_info->page_dma = 0;
6132 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6133 struct igb_rx_buffer *next_buffer;
6134 next_buffer = &rx_ring->rx_buffer_info[i];
6135 buffer_info->skb = next_buffer->skb;
6136 buffer_info->dma = next_buffer->dma;
6137 next_buffer->skb = skb;
6138 next_buffer->dma = 0;
6142 if (unlikely((igb_test_staterr(rx_desc,
6143 E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6144 && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6145 dev_kfree_skb_any(skb);
6149 #ifdef CONFIG_IGB_PTP
6150 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6152 igb_rx_hash(rx_ring, rx_desc, skb);
6153 igb_rx_checksum(rx_ring, rx_desc, skb);
6154 igb_rx_vlan(rx_ring, rx_desc, skb);
6156 total_bytes += skb->len;
6159 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6161 napi_gro_receive(&q_vector->napi, skb);
6169 /* return some buffers to hardware, one at a time is too slow */
6170 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6171 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6175 /* use prefetched values */
6179 rx_ring->next_to_clean = i;
6180 u64_stats_update_begin(&rx_ring->rx_syncp);
6181 rx_ring->rx_stats.packets += total_packets;
6182 rx_ring->rx_stats.bytes += total_bytes;
6183 u64_stats_update_end(&rx_ring->rx_syncp);
6184 q_vector->rx.total_packets += total_packets;
6185 q_vector->rx.total_bytes += total_bytes;
6188 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6193 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6194 struct igb_rx_buffer *bi)
6196 struct sk_buff *skb = bi->skb;
6197 dma_addr_t dma = bi->dma;
6203 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6207 rx_ring->rx_stats.alloc_failed++;
6211 /* initialize skb for ring */
6212 skb_record_rx_queue(skb, rx_ring->queue_index);
6215 dma = dma_map_single(rx_ring->dev, skb->data,
6216 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6218 if (dma_mapping_error(rx_ring->dev, dma)) {
6219 rx_ring->rx_stats.alloc_failed++;
6227 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6228 struct igb_rx_buffer *bi)
6230 struct page *page = bi->page;
6231 dma_addr_t page_dma = bi->page_dma;
6232 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6238 page = __skb_alloc_page(GFP_ATOMIC, bi->skb);
6240 if (unlikely(!page)) {
6241 rx_ring->rx_stats.alloc_failed++;
6246 page_dma = dma_map_page(rx_ring->dev, page,
6247 page_offset, PAGE_SIZE / 2,
6250 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6251 rx_ring->rx_stats.alloc_failed++;
6255 bi->page_dma = page_dma;
6256 bi->page_offset = page_offset;
6261 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6262 * @adapter: address of board private structure
6264 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6266 union e1000_adv_rx_desc *rx_desc;
6267 struct igb_rx_buffer *bi;
6268 u16 i = rx_ring->next_to_use;
6270 rx_desc = IGB_RX_DESC(rx_ring, i);
6271 bi = &rx_ring->rx_buffer_info[i];
6272 i -= rx_ring->count;
6274 while (cleaned_count--) {
6275 if (!igb_alloc_mapped_skb(rx_ring, bi))
6278 /* Refresh the desc even if buffer_addrs didn't change
6279 * because each write-back erases this info. */
6280 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6282 if (!igb_alloc_mapped_page(rx_ring, bi))
6285 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6291 rx_desc = IGB_RX_DESC(rx_ring, 0);
6292 bi = rx_ring->rx_buffer_info;
6293 i -= rx_ring->count;
6296 /* clear the hdr_addr for the next_to_use descriptor */
6297 rx_desc->read.hdr_addr = 0;
6300 i += rx_ring->count;
6302 if (rx_ring->next_to_use != i) {
6303 rx_ring->next_to_use = i;
6305 /* Force memory writes to complete before letting h/w
6306 * know there are new descriptors to fetch. (Only
6307 * applicable for weak-ordered memory model archs,
6308 * such as IA-64). */
6310 writel(i, rx_ring->tail);
6320 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6322 struct igb_adapter *adapter = netdev_priv(netdev);
6323 struct mii_ioctl_data *data = if_mii(ifr);
6325 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6330 data->phy_id = adapter->hw.phy.addr;
6333 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6345 * igb_hwtstamp_ioctl - control hardware time stamping
6350 * Outgoing time stamping can be enabled and disabled. Play nice and
6351 * disable it when requested, although it shouldn't case any overhead
6352 * when no packet needs it. At most one packet in the queue may be
6353 * marked for time stamping, otherwise it would be impossible to tell
6354 * for sure to which packet the hardware time stamp belongs.
6356 * Incoming time stamping has to be configured via the hardware
6357 * filters. Not all combinations are supported, in particular event
6358 * type has to be specified. Matching the kind of event packet is
6359 * not supported, with the exception of "all V2 events regardless of
6363 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6364 struct ifreq *ifr, int cmd)
6366 struct igb_adapter *adapter = netdev_priv(netdev);
6367 struct e1000_hw *hw = &adapter->hw;
6368 struct hwtstamp_config config;
6369 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6370 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6371 u32 tsync_rx_cfg = 0;
6376 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6379 /* reserved for future extensions */
6383 switch (config.tx_type) {
6384 case HWTSTAMP_TX_OFF:
6386 case HWTSTAMP_TX_ON:
6392 switch (config.rx_filter) {
6393 case HWTSTAMP_FILTER_NONE:
6396 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6397 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6398 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6399 case HWTSTAMP_FILTER_ALL:
6401 * register TSYNCRXCFG must be set, therefore it is not
6402 * possible to time stamp both Sync and Delay_Req messages
6403 * => fall back to time stamping all packets
6405 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6406 config.rx_filter = HWTSTAMP_FILTER_ALL;
6408 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6409 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6410 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6413 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6414 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6415 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6418 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6419 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6420 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6421 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6424 config.rx_filter = HWTSTAMP_FILTER_SOME;
6426 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6427 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6428 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6429 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6432 config.rx_filter = HWTSTAMP_FILTER_SOME;
6434 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6435 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6436 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6437 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6438 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6446 if (hw->mac.type == e1000_82575) {
6447 if (tsync_rx_ctl | tsync_tx_ctl)
6453 * Per-packet timestamping only works if all packets are
6454 * timestamped, so enable timestamping in all packets as
6455 * long as one rx filter was configured.
6457 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6458 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6459 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6462 /* enable/disable TX */
6463 regval = rd32(E1000_TSYNCTXCTL);
6464 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6465 regval |= tsync_tx_ctl;
6466 wr32(E1000_TSYNCTXCTL, regval);
6468 /* enable/disable RX */
6469 regval = rd32(E1000_TSYNCRXCTL);
6470 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6471 regval |= tsync_rx_ctl;
6472 wr32(E1000_TSYNCRXCTL, regval);
6474 /* define which PTP packets are time stamped */
6475 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6477 /* define ethertype filter for timestamped packets */
6480 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6481 E1000_ETQF_1588 | /* enable timestamping */
6482 ETH_P_1588)); /* 1588 eth protocol type */
6484 wr32(E1000_ETQF(3), 0);
6486 #define PTP_PORT 319
6487 /* L4 Queue Filter[3]: filter by destination port and protocol */
6489 u32 ftqf = (IPPROTO_UDP /* UDP */
6490 | E1000_FTQF_VF_BP /* VF not compared */
6491 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6492 | E1000_FTQF_MASK); /* mask all inputs */
6493 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6495 wr32(E1000_IMIR(3), htons(PTP_PORT));
6496 wr32(E1000_IMIREXT(3),
6497 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6498 if (hw->mac.type == e1000_82576) {
6499 /* enable source port check */
6500 wr32(E1000_SPQF(3), htons(PTP_PORT));
6501 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6503 wr32(E1000_FTQF(3), ftqf);
6505 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6509 adapter->hwtstamp_config = config;
6511 /* clear TX/RX time stamp registers, just to be sure */
6512 regval = rd32(E1000_TXSTMPH);
6513 regval = rd32(E1000_RXSTMPH);
6515 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6525 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6531 return igb_mii_ioctl(netdev, ifr, cmd);
6533 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6539 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6541 struct igb_adapter *adapter = hw->back;
6544 cap_offset = adapter->pdev->pcie_cap;
6546 return -E1000_ERR_CONFIG;
6548 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6553 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6555 struct igb_adapter *adapter = hw->back;
6558 cap_offset = adapter->pdev->pcie_cap;
6560 return -E1000_ERR_CONFIG;
6562 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6567 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6569 struct igb_adapter *adapter = netdev_priv(netdev);
6570 struct e1000_hw *hw = &adapter->hw;
6572 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6575 /* enable VLAN tag insert/strip */
6576 ctrl = rd32(E1000_CTRL);
6577 ctrl |= E1000_CTRL_VME;
6578 wr32(E1000_CTRL, ctrl);
6580 /* Disable CFI check */
6581 rctl = rd32(E1000_RCTL);
6582 rctl &= ~E1000_RCTL_CFIEN;
6583 wr32(E1000_RCTL, rctl);
6585 /* disable VLAN tag insert/strip */
6586 ctrl = rd32(E1000_CTRL);
6587 ctrl &= ~E1000_CTRL_VME;
6588 wr32(E1000_CTRL, ctrl);
6591 igb_rlpml_set(adapter);
6594 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6596 struct igb_adapter *adapter = netdev_priv(netdev);
6597 struct e1000_hw *hw = &adapter->hw;
6598 int pf_id = adapter->vfs_allocated_count;
6600 /* attempt to add filter to vlvf array */
6601 igb_vlvf_set(adapter, vid, true, pf_id);
6603 /* add the filter since PF can receive vlans w/o entry in vlvf */
6604 igb_vfta_set(hw, vid, true);
6606 set_bit(vid, adapter->active_vlans);
6611 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6613 struct igb_adapter *adapter = netdev_priv(netdev);
6614 struct e1000_hw *hw = &adapter->hw;
6615 int pf_id = adapter->vfs_allocated_count;
6618 /* remove vlan from VLVF table array */
6619 err = igb_vlvf_set(adapter, vid, false, pf_id);
6621 /* if vid was not present in VLVF just remove it from table */
6623 igb_vfta_set(hw, vid, false);
6625 clear_bit(vid, adapter->active_vlans);
6630 static void igb_restore_vlan(struct igb_adapter *adapter)
6634 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6636 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6637 igb_vlan_rx_add_vid(adapter->netdev, vid);
6640 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6642 struct pci_dev *pdev = adapter->pdev;
6643 struct e1000_mac_info *mac = &adapter->hw.mac;
6647 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6648 * for the switch() below to work */
6649 if ((spd & 1) || (dplx & ~1))
6652 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6653 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6654 spd != SPEED_1000 &&
6655 dplx != DUPLEX_FULL)
6658 switch (spd + dplx) {
6659 case SPEED_10 + DUPLEX_HALF:
6660 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6662 case SPEED_10 + DUPLEX_FULL:
6663 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6665 case SPEED_100 + DUPLEX_HALF:
6666 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6668 case SPEED_100 + DUPLEX_FULL:
6669 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6671 case SPEED_1000 + DUPLEX_FULL:
6673 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6675 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6682 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6686 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6689 struct net_device *netdev = pci_get_drvdata(pdev);
6690 struct igb_adapter *adapter = netdev_priv(netdev);
6691 struct e1000_hw *hw = &adapter->hw;
6692 u32 ctrl, rctl, status;
6693 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6698 netif_device_detach(netdev);
6700 if (netif_running(netdev))
6701 __igb_close(netdev, true);
6703 igb_clear_interrupt_scheme(adapter);
6706 retval = pci_save_state(pdev);
6711 status = rd32(E1000_STATUS);
6712 if (status & E1000_STATUS_LU)
6713 wufc &= ~E1000_WUFC_LNKC;
6716 igb_setup_rctl(adapter);
6717 igb_set_rx_mode(netdev);
6719 /* turn on all-multi mode if wake on multicast is enabled */
6720 if (wufc & E1000_WUFC_MC) {
6721 rctl = rd32(E1000_RCTL);
6722 rctl |= E1000_RCTL_MPE;
6723 wr32(E1000_RCTL, rctl);
6726 ctrl = rd32(E1000_CTRL);
6727 /* advertise wake from D3Cold */
6728 #define E1000_CTRL_ADVD3WUC 0x00100000
6729 /* phy power management enable */
6730 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6731 ctrl |= E1000_CTRL_ADVD3WUC;
6732 wr32(E1000_CTRL, ctrl);
6734 /* Allow time for pending master requests to run */
6735 igb_disable_pcie_master(hw);
6737 wr32(E1000_WUC, E1000_WUC_PME_EN);
6738 wr32(E1000_WUFC, wufc);
6741 wr32(E1000_WUFC, 0);
6744 *enable_wake = wufc || adapter->en_mng_pt;
6746 igb_power_down_link(adapter);
6748 igb_power_up_link(adapter);
6750 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6751 * would have already happened in close and is redundant. */
6752 igb_release_hw_control(adapter);
6754 pci_disable_device(pdev);
6760 #ifdef CONFIG_PM_SLEEP
6761 static int igb_suspend(struct device *dev)
6765 struct pci_dev *pdev = to_pci_dev(dev);
6767 retval = __igb_shutdown(pdev, &wake, 0);
6772 pci_prepare_to_sleep(pdev);
6774 pci_wake_from_d3(pdev, false);
6775 pci_set_power_state(pdev, PCI_D3hot);
6780 #endif /* CONFIG_PM_SLEEP */
6782 static int igb_resume(struct device *dev)
6784 struct pci_dev *pdev = to_pci_dev(dev);
6785 struct net_device *netdev = pci_get_drvdata(pdev);
6786 struct igb_adapter *adapter = netdev_priv(netdev);
6787 struct e1000_hw *hw = &adapter->hw;
6790 pci_set_power_state(pdev, PCI_D0);
6791 pci_restore_state(pdev);
6792 pci_save_state(pdev);
6794 err = pci_enable_device_mem(pdev);
6797 "igb: Cannot enable PCI device from suspend\n");
6800 pci_set_master(pdev);
6802 pci_enable_wake(pdev, PCI_D3hot, 0);
6803 pci_enable_wake(pdev, PCI_D3cold, 0);
6805 if (igb_init_interrupt_scheme(adapter)) {
6806 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6812 /* let the f/w know that the h/w is now under the control of the
6814 igb_get_hw_control(adapter);
6816 wr32(E1000_WUS, ~0);
6818 if (netdev->flags & IFF_UP) {
6819 err = __igb_open(netdev, true);
6824 netif_device_attach(netdev);
6828 #ifdef CONFIG_PM_RUNTIME
6829 static int igb_runtime_idle(struct device *dev)
6831 struct pci_dev *pdev = to_pci_dev(dev);
6832 struct net_device *netdev = pci_get_drvdata(pdev);
6833 struct igb_adapter *adapter = netdev_priv(netdev);
6835 if (!igb_has_link(adapter))
6836 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6841 static int igb_runtime_suspend(struct device *dev)
6843 struct pci_dev *pdev = to_pci_dev(dev);
6847 retval = __igb_shutdown(pdev, &wake, 1);
6852 pci_prepare_to_sleep(pdev);
6854 pci_wake_from_d3(pdev, false);
6855 pci_set_power_state(pdev, PCI_D3hot);
6861 static int igb_runtime_resume(struct device *dev)
6863 return igb_resume(dev);
6865 #endif /* CONFIG_PM_RUNTIME */
6868 static void igb_shutdown(struct pci_dev *pdev)
6872 __igb_shutdown(pdev, &wake, 0);
6874 if (system_state == SYSTEM_POWER_OFF) {
6875 pci_wake_from_d3(pdev, wake);
6876 pci_set_power_state(pdev, PCI_D3hot);
6880 #ifdef CONFIG_NET_POLL_CONTROLLER
6882 * Polling 'interrupt' - used by things like netconsole to send skbs
6883 * without having to re-enable interrupts. It's not called while
6884 * the interrupt routine is executing.
6886 static void igb_netpoll(struct net_device *netdev)
6888 struct igb_adapter *adapter = netdev_priv(netdev);
6889 struct e1000_hw *hw = &adapter->hw;
6890 struct igb_q_vector *q_vector;
6893 for (i = 0; i < adapter->num_q_vectors; i++) {
6894 q_vector = adapter->q_vector[i];
6895 if (adapter->msix_entries)
6896 wr32(E1000_EIMC, q_vector->eims_value);
6898 igb_irq_disable(adapter);
6899 napi_schedule(&q_vector->napi);
6902 #endif /* CONFIG_NET_POLL_CONTROLLER */
6905 * igb_io_error_detected - called when PCI error is detected
6906 * @pdev: Pointer to PCI device
6907 * @state: The current pci connection state
6909 * This function is called after a PCI bus error affecting
6910 * this device has been detected.
6912 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6913 pci_channel_state_t state)
6915 struct net_device *netdev = pci_get_drvdata(pdev);
6916 struct igb_adapter *adapter = netdev_priv(netdev);
6918 netif_device_detach(netdev);
6920 if (state == pci_channel_io_perm_failure)
6921 return PCI_ERS_RESULT_DISCONNECT;
6923 if (netif_running(netdev))
6925 pci_disable_device(pdev);
6927 /* Request a slot slot reset. */
6928 return PCI_ERS_RESULT_NEED_RESET;
6932 * igb_io_slot_reset - called after the pci bus has been reset.
6933 * @pdev: Pointer to PCI device
6935 * Restart the card from scratch, as if from a cold-boot. Implementation
6936 * resembles the first-half of the igb_resume routine.
6938 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6940 struct net_device *netdev = pci_get_drvdata(pdev);
6941 struct igb_adapter *adapter = netdev_priv(netdev);
6942 struct e1000_hw *hw = &adapter->hw;
6943 pci_ers_result_t result;
6946 if (pci_enable_device_mem(pdev)) {
6948 "Cannot re-enable PCI device after reset.\n");
6949 result = PCI_ERS_RESULT_DISCONNECT;
6951 pci_set_master(pdev);
6952 pci_restore_state(pdev);
6953 pci_save_state(pdev);
6955 pci_enable_wake(pdev, PCI_D3hot, 0);
6956 pci_enable_wake(pdev, PCI_D3cold, 0);
6959 wr32(E1000_WUS, ~0);
6960 result = PCI_ERS_RESULT_RECOVERED;
6963 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6965 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6966 "failed 0x%0x\n", err);
6967 /* non-fatal, continue */
6974 * igb_io_resume - called when traffic can start flowing again.
6975 * @pdev: Pointer to PCI device
6977 * This callback is called when the error recovery driver tells us that
6978 * its OK to resume normal operation. Implementation resembles the
6979 * second-half of the igb_resume routine.
6981 static void igb_io_resume(struct pci_dev *pdev)
6983 struct net_device *netdev = pci_get_drvdata(pdev);
6984 struct igb_adapter *adapter = netdev_priv(netdev);
6986 if (netif_running(netdev)) {
6987 if (igb_up(adapter)) {
6988 dev_err(&pdev->dev, "igb_up failed after reset\n");
6993 netif_device_attach(netdev);
6995 /* let the f/w know that the h/w is now under the control of the
6997 igb_get_hw_control(adapter);
7000 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7003 u32 rar_low, rar_high;
7004 struct e1000_hw *hw = &adapter->hw;
7006 /* HW expects these in little endian so we reverse the byte order
7007 * from network order (big endian) to little endian
7009 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7010 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7011 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7013 /* Indicate to hardware the Address is Valid. */
7014 rar_high |= E1000_RAH_AV;
7016 if (hw->mac.type == e1000_82575)
7017 rar_high |= E1000_RAH_POOL_1 * qsel;
7019 rar_high |= E1000_RAH_POOL_1 << qsel;
7021 wr32(E1000_RAL(index), rar_low);
7023 wr32(E1000_RAH(index), rar_high);
7027 static int igb_set_vf_mac(struct igb_adapter *adapter,
7028 int vf, unsigned char *mac_addr)
7030 struct e1000_hw *hw = &adapter->hw;
7031 /* VF MAC addresses start at end of receive addresses and moves
7032 * torwards the first, as a result a collision should not be possible */
7033 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7035 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7037 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7042 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7044 struct igb_adapter *adapter = netdev_priv(netdev);
7045 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7047 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7048 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7049 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7050 " change effective.");
7051 if (test_bit(__IGB_DOWN, &adapter->state)) {
7052 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7053 " but the PF device is not up.\n");
7054 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7055 " attempting to use the VF device.\n");
7057 return igb_set_vf_mac(adapter, vf, mac);
7060 static int igb_link_mbps(int internal_link_speed)
7062 switch (internal_link_speed) {
7072 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7079 /* Calculate the rate factor values to set */
7080 rf_int = link_speed / tx_rate;
7081 rf_dec = (link_speed - (rf_int * tx_rate));
7082 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7084 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7085 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7086 E1000_RTTBCNRC_RF_INT_MASK);
7087 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7092 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7094 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
7095 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
7097 wr32(E1000_RTTBCNRM, 0x14);
7098 wr32(E1000_RTTBCNRC, bcnrc_val);
7101 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7103 int actual_link_speed, i;
7104 bool reset_rate = false;
7106 /* VF TX rate limit was not set or not supported */
7107 if ((adapter->vf_rate_link_speed == 0) ||
7108 (adapter->hw.mac.type != e1000_82576))
7111 actual_link_speed = igb_link_mbps(adapter->link_speed);
7112 if (actual_link_speed != adapter->vf_rate_link_speed) {
7114 adapter->vf_rate_link_speed = 0;
7115 dev_info(&adapter->pdev->dev,
7116 "Link speed has been changed. VF Transmit "
7117 "rate is disabled\n");
7120 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7122 adapter->vf_data[i].tx_rate = 0;
7124 igb_set_vf_rate_limit(&adapter->hw, i,
7125 adapter->vf_data[i].tx_rate,
7130 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7132 struct igb_adapter *adapter = netdev_priv(netdev);
7133 struct e1000_hw *hw = &adapter->hw;
7134 int actual_link_speed;
7136 if (hw->mac.type != e1000_82576)
7139 actual_link_speed = igb_link_mbps(adapter->link_speed);
7140 if ((vf >= adapter->vfs_allocated_count) ||
7141 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7142 (tx_rate < 0) || (tx_rate > actual_link_speed))
7145 adapter->vf_rate_link_speed = actual_link_speed;
7146 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7147 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7152 static int igb_ndo_get_vf_config(struct net_device *netdev,
7153 int vf, struct ifla_vf_info *ivi)
7155 struct igb_adapter *adapter = netdev_priv(netdev);
7156 if (vf >= adapter->vfs_allocated_count)
7159 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7160 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7161 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7162 ivi->qos = adapter->vf_data[vf].pf_qos;
7166 static void igb_vmm_control(struct igb_adapter *adapter)
7168 struct e1000_hw *hw = &adapter->hw;
7171 switch (hw->mac.type) {
7176 /* replication is not supported for 82575 */
7179 /* notify HW that the MAC is adding vlan tags */
7180 reg = rd32(E1000_DTXCTL);
7181 reg |= E1000_DTXCTL_VLAN_ADDED;
7182 wr32(E1000_DTXCTL, reg);
7184 /* enable replication vlan tag stripping */
7185 reg = rd32(E1000_RPLOLR);
7186 reg |= E1000_RPLOLR_STRVLAN;
7187 wr32(E1000_RPLOLR, reg);
7189 /* none of the above registers are supported by i350 */
7193 if (adapter->vfs_allocated_count) {
7194 igb_vmdq_set_loopback_pf(hw, true);
7195 igb_vmdq_set_replication_pf(hw, true);
7196 igb_vmdq_set_anti_spoofing_pf(hw, true,
7197 adapter->vfs_allocated_count);
7199 igb_vmdq_set_loopback_pf(hw, false);
7200 igb_vmdq_set_replication_pf(hw, false);
7204 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7206 struct e1000_hw *hw = &adapter->hw;
7210 if (hw->mac.type > e1000_82580) {
7211 if (adapter->flags & IGB_FLAG_DMAC) {
7214 /* force threshold to 0. */
7215 wr32(E1000_DMCTXTH, 0);
7218 * DMA Coalescing high water mark needs to be greater
7219 * than the Rx threshold. Set hwm to PBA - max frame
7220 * size in 16B units, capping it at PBA - 6KB.
7222 hwm = 64 * pba - adapter->max_frame_size / 16;
7223 if (hwm < 64 * (pba - 6))
7224 hwm = 64 * (pba - 6);
7225 reg = rd32(E1000_FCRTC);
7226 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7227 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7228 & E1000_FCRTC_RTH_COAL_MASK);
7229 wr32(E1000_FCRTC, reg);
7232 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7233 * frame size, capping it at PBA - 10KB.
7235 dmac_thr = pba - adapter->max_frame_size / 512;
7236 if (dmac_thr < pba - 10)
7237 dmac_thr = pba - 10;
7238 reg = rd32(E1000_DMACR);
7239 reg &= ~E1000_DMACR_DMACTHR_MASK;
7240 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7241 & E1000_DMACR_DMACTHR_MASK);
7243 /* transition to L0x or L1 if available..*/
7244 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7246 /* watchdog timer= +-1000 usec in 32usec intervals */
7249 /* Disable BMC-to-OS Watchdog Enable */
7250 reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
7251 wr32(E1000_DMACR, reg);
7254 * no lower threshold to disable
7255 * coalescing(smart fifb)-UTRESH=0
7257 wr32(E1000_DMCRTRH, 0);
7259 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7261 wr32(E1000_DMCTLX, reg);
7264 * free space in tx packet buffer to wake from
7267 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7268 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7271 * make low power state decision controlled
7274 reg = rd32(E1000_PCIEMISC);
7275 reg &= ~E1000_PCIEMISC_LX_DECISION;
7276 wr32(E1000_PCIEMISC, reg);
7277 } /* endif adapter->dmac is not disabled */
7278 } else if (hw->mac.type == e1000_82580) {
7279 u32 reg = rd32(E1000_PCIEMISC);
7280 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7281 wr32(E1000_DMACR, 0);