kernel - Implement POLLING support for if_igb, change token->lockmgr lock
[dragonfly.git] / sys / dev / netif / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33
34
35 #include "opt_polling.h"
36 #include "opt_inet.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #if __FreeBSD_version >= 800000
41 #include <sys/buf_ring.h>
42 #endif
43 #include <sys/bus.h>
44 #include <sys/endian.h>
45 #include <sys/lock.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/module.h>
51 #include <sys/rman.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/taskqueue.h>
56 #include <sys/eventhandler.h>
57
58 #ifdef IGB_IEEE1588
59 #include <sys/ieee1588.h>
60 #endif
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/ifq_var.h>
69
70 #include <net/if_types.h>
71 #include <net/vlan/if_vlan_var.h>
72 #include <net/vlan/if_vlan_ether.h>
73
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #ifdef NET_LRO
81 #include <netinet/tcp_lro.h>
82 #endif
83 #include <netinet/udp.h>
84
85 #include <sys/in_cksum.h>
86 #include <bus/pci/pcivar.h>
87 #include <bus/pci/pcireg.h>
88
89 #include "e1000_api.h"
90 #include "e1000_82575.h"
91 #include "if_igb.h"
92 #include "ifcap_defines.h" // XXX
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.1";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         /* required last entry */
138         { 0, 0, 0, 0, 0}
139 };
140
141 /*********************************************************************
142  *  Table of branding strings for all supported NICs.
143  *********************************************************************/
144
145 static char *igb_strings[] = {
146         "Intel(R) PRO/1000 Network Connection"
147 };
148
149 /*********************************************************************
150  *  Function prototypes
151  *********************************************************************/
152 static int      igb_probe(device_t);
153 static int      igb_attach(device_t);
154 static int      igb_detach(device_t);
155 static int      igb_shutdown(device_t);
156 static int      igb_suspend(device_t);
157 static int      igb_resume(device_t);
158 static void     igb_start(struct ifnet *);
159 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160 #if __FreeBSD_version >= 800000
161 static int      igb_mq_start(struct ifnet *, struct mbuf *);
162 static int      igb_mq_start_locked(struct ifnet *,
163                     struct tx_ring *, struct mbuf *);
164 static void     igb_qflush(struct ifnet *);
165 #endif
166 static int      igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
167 static void     igb_init(void *);
168 static void     igb_init_locked(struct adapter *);
169 static void     igb_stop(void *);
170 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
171 static int      igb_media_change(struct ifnet *);
172 static void     igb_identify_hardware(struct adapter *);
173 static int      igb_allocate_pci_resources(struct adapter *);
174 static int      igb_allocate_msix(struct adapter *);
175 static int      igb_allocate_legacy(struct adapter *);
176 static int      igb_setup_msix(struct adapter *);
177 static void     igb_free_pci_resources(struct adapter *);
178 static void     igb_local_timer(void *);
179 static void     igb_reset(struct adapter *);
180 static void     igb_setup_interface(device_t, struct adapter *);
181 static int      igb_allocate_queues(struct adapter *);
182 static void     igb_configure_queues(struct adapter *);
183
184 static int      igb_allocate_transmit_buffers(struct tx_ring *);
185 static void     igb_setup_transmit_structures(struct adapter *);
186 static void     igb_setup_transmit_ring(struct tx_ring *);
187 static void     igb_initialize_transmit_units(struct adapter *);
188 static void     igb_free_transmit_structures(struct adapter *);
189 static void     igb_free_transmit_buffers(struct tx_ring *);
190
191 static int      igb_allocate_receive_buffers(struct rx_ring *);
192 static int      igb_setup_receive_structures(struct adapter *);
193 static int      igb_setup_receive_ring(struct rx_ring *);
194 static void     igb_initialize_receive_units(struct adapter *);
195 static void     igb_free_receive_structures(struct adapter *);
196 static void     igb_free_receive_buffers(struct rx_ring *);
197 static void     igb_free_receive_ring(struct rx_ring *);
198
199 static void     igb_enable_intr(struct adapter *);
200 static void     igb_disable_intr(struct adapter *);
201 static void     igb_update_stats_counters(struct adapter *);
202 static bool     igb_txeof(struct tx_ring *);
203
204 static __inline void igb_rx_discard(struct rx_ring *,
205                     union e1000_adv_rx_desc *, int);
206 static __inline void igb_rx_input(struct rx_ring *,
207                     struct ifnet *, struct mbuf *, u32);
208
209 static bool     igb_rxeof(struct rx_ring *, int);
210 static void     igb_rx_checksum(u32, struct mbuf *, u32);
211 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
212 #if NET_TSO 
213 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
214 #endif
215 static void     igb_set_promisc(struct adapter *);
216 static void     igb_disable_promisc(struct adapter *);
217 static void     igb_set_multi(struct adapter *);
218 static void     igb_print_hw_stats(struct adapter *);
219 static void     igb_update_link_status(struct adapter *);
220 static int      igb_get_buf(struct rx_ring *, int, u8);
221
222 static void     igb_register_vlan(void *, struct ifnet *, u16);
223 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
224 static void     igb_setup_vlan_hw_support(struct adapter *);
225
226 static int      igb_xmit(struct tx_ring *, struct mbuf **);
227 static int      igb_dma_malloc(struct adapter *, bus_size_t,
228                     struct igb_dma_alloc *, int);
229 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
230 static void     igb_print_debug_info(struct adapter *);
231 static void     igb_print_nvm_info(struct adapter *);
232 static int      igb_is_valid_ether_addr(u8 *);
233 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
234 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
235 /* Management and WOL Support */
236 static void     igb_init_manageability(struct adapter *);
237 static void     igb_release_manageability(struct adapter *);
238 static void     igb_get_hw_control(struct adapter *);
239 static void     igb_release_hw_control(struct adapter *);
240 static void     igb_enable_wakeup(device_t);
241
242 static void     igb_irq_fast(void *);
243 static void     igb_add_rx_process_limit(struct adapter *, const char *,
244                     const char *, int *, int);
245 static void     igb_handle_rxtx(void *context, int pending);
246 static void     igb_handle_que(void *context, int pending);
247 static void     igb_handle_link(void *context, int pending);
248
249 /* These are MSIX only irq handlers */
250 static void     igb_msix_que(void *);
251 static void     igb_msix_link(void *);
252
253 #ifdef DEVICE_POLLING
254 static poll_handler_t igb_poll;
255 #endif /* POLLING */
256
257 /*********************************************************************
258  *  FreeBSD Device Interface Entry Points
259  *********************************************************************/
260
261 static device_method_t igb_methods[] = {
262         /* Device interface */
263         DEVMETHOD(device_probe, igb_probe),
264         DEVMETHOD(device_attach, igb_attach),
265         DEVMETHOD(device_detach, igb_detach),
266         DEVMETHOD(device_shutdown, igb_shutdown),
267         DEVMETHOD(device_suspend, igb_suspend),
268         DEVMETHOD(device_resume, igb_resume),
269         {0, 0}
270 };
271
272 static driver_t igb_driver = {
273         "igb", igb_methods, sizeof(struct adapter),
274 };
275
276 static devclass_t igb_devclass;
277 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
278 MODULE_DEPEND(igb, pci, 1, 1, 1);
279 MODULE_DEPEND(igb, ether, 1, 1, 1);
280
281 /*********************************************************************
282  *  Tunable default values.
283  *********************************************************************/
284
285 /* Descriptor defaults */
286 static int igb_rxd = IGB_DEFAULT_RXD;
287 static int igb_txd = IGB_DEFAULT_TXD;
288 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
289 TUNABLE_INT("hw.igb.txd", &igb_txd);
290
291 /*
292 ** AIM: Adaptive Interrupt Moderation
293 ** which means that the interrupt rate
294 ** is varied over time based on the
295 ** traffic for that interrupt vector
296 */
297 static int igb_enable_aim = TRUE;
298 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
299
300 /*
301  * MSIX should be the default for best performance,
302  * but this allows it to be forced off for testing.
303  */         
304 static int igb_enable_msix = 0;
305 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
306
307 /*
308  * Header split has seemed to be beneficial in
309  * many circumstances tested, however there have
310  * been some stability issues, so the default is
311  * off. 
312  */
313 static bool igb_header_split = FALSE;
314 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
315
316 /*
317 ** This will autoconfigure based on
318 ** the number of CPUs if left at 0.
319 */
320 static int igb_num_queues = 0;
321 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
322
323 /* How many packets rxeof tries to clean at a time */
324 static int igb_rx_process_limit = 100;
325 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
326
327 /* Flow control setting - default to FULL */
328 static int igb_fc_setting = e1000_fc_full;
329 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
330
331 /*
332 ** Shadow VFTA table, this is needed because
333 ** the real filter table gets cleared during
334 ** a soft reset and the driver needs to be able
335 ** to repopulate it.
336 */
337 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
338
339
340 /*********************************************************************
341  *  Device identification routine
342  *
343  *  igb_probe determines if the driver should be loaded on
344  *  adapter based on PCI vendor/device id of the adapter.
345  *
346  *  return BUS_PROBE_DEFAULT on success, positive on failure
347  *********************************************************************/
348
349 static int
350 igb_probe(device_t dev)
351 {
352         char            adapter_name[60];
353         uint16_t        pci_vendor_id = 0;
354         uint16_t        pci_device_id = 0;
355         uint16_t        pci_subvendor_id = 0;
356         uint16_t        pci_subdevice_id = 0;
357         igb_vendor_info_t *ent;
358
359         INIT_DEBUGOUT("igb_probe: begin");
360
361         pci_vendor_id = pci_get_vendor(dev);
362         if (pci_vendor_id != IGB_VENDOR_ID)
363                 return (ENXIO);
364
365         pci_device_id = pci_get_device(dev);
366         pci_subvendor_id = pci_get_subvendor(dev);
367         pci_subdevice_id = pci_get_subdevice(dev);
368
369         ent = igb_vendor_info_array;
370         while (ent->vendor_id != 0) {
371                 if ((pci_vendor_id == ent->vendor_id) &&
372                     (pci_device_id == ent->device_id) &&
373
374                     ((pci_subvendor_id == ent->subvendor_id) ||
375                     (ent->subvendor_id == PCI_ANY_ID)) &&
376
377                     ((pci_subdevice_id == ent->subdevice_id) ||
378                     (ent->subdevice_id == PCI_ANY_ID))) {
379                         ksprintf(adapter_name, "%s %s",
380                                 igb_strings[ent->index],
381                                 igb_driver_version);
382                         device_set_desc_copy(dev, adapter_name);
383                         return (BUS_PROBE_DEFAULT);
384                 }
385                 ent++;
386         }
387
388         return (ENXIO);
389 }
390
391 /*********************************************************************
392  *  Device initialization routine
393  *
394  *  The attach entry point is called when the driver is being loaded.
395  *  This routine identifies the type of hardware, allocates all resources
396  *  and initializes the hardware.
397  *
398  *  return 0 on success, positive on failure
399  *********************************************************************/
400
401 static int
402 igb_attach(device_t dev)
403 {
404         struct adapter  *adapter;
405         int             error = 0;
406         u16             eeprom_data;
407
408         INIT_DEBUGOUT("igb_attach: begin");
409
410         adapter = device_get_softc(dev);
411         adapter->dev = adapter->osdep.dev = dev;
412         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413
414         /* SYSCTL stuff */
415         sysctl_ctx_init(&adapter->sysctl_ctx);
416         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
417                                         SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
418                                         device_get_nameunit(adapter->dev),
419                                         CTLFLAG_RD, 0, "");
420         if (adapter->sysctl_tree == NULL) {
421                 device_printf(adapter->dev, "can't add sysctl node\n");
422                 error = ENOMEM;
423                 goto err_sysctl;
424         }
425
426         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
427             SYSCTL_CHILDREN(adapter->sysctl_tree),
428             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
429             igb_sysctl_debug_info, "I", "Debug Information");
430
431         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
432             SYSCTL_CHILDREN(adapter->sysctl_tree),
433             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
434             igb_sysctl_stats, "I", "Statistics");
435
436         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
437             SYSCTL_CHILDREN(adapter->sysctl_tree),
438             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
439             &igb_fc_setting, 0, "Flow Control");
440
441         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
442             SYSCTL_CHILDREN(adapter->sysctl_tree),
443             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
444             &igb_enable_aim, 1, "Interrupt Moderation");
445
446         callout_init(&adapter->timer);
447
448         /* Determine hardware and mac info */
449         igb_identify_hardware(adapter);
450
451         /* Setup PCI resources */
452         if (igb_allocate_pci_resources(adapter)) {
453                 device_printf(dev, "Allocation of PCI resources failed\n");
454                 error = ENXIO;
455                 goto err_pci;
456         }
457
458         /* Do Shared Code initialization */
459         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
460                 device_printf(dev, "Setup of Shared code failed\n");
461                 error = ENXIO;
462                 goto err_pci;
463         }
464
465         e1000_get_bus_info(&adapter->hw);
466
467         /* Sysctls for limiting the amount of work done in the taskqueue */
468         igb_add_rx_process_limit(adapter, "rx_processing_limit",
469             "max number of rx packets to process", &adapter->rx_process_limit,
470             igb_rx_process_limit);
471
472         /*
473          * Validate number of transmit and receive descriptors. It
474          * must not exceed hardware maximum, and must be multiple
475          * of E1000_DBA_ALIGN.
476          */
477         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
478             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
479                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
480                     IGB_DEFAULT_TXD, igb_txd);
481                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
482         } else
483                 adapter->num_tx_desc = igb_txd;
484         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
485             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
486                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
487                     IGB_DEFAULT_RXD, igb_rxd);
488                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
489         } else
490                 adapter->num_rx_desc = igb_rxd;
491
492         adapter->hw.mac.autoneg = DO_AUTO_NEG;
493         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
494         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
495
496         /* Copper options */
497         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
498                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
499                 adapter->hw.phy.disable_polarity_correction = FALSE;
500                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
501         }
502
503         /*
504          * Set the frame limits assuming
505          * standard ethernet sized frames.
506          */
507         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
508         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
509
510         /*
511         ** Allocate and Setup Queues
512         */
513         if (igb_allocate_queues(adapter)) {
514                 error = ENOMEM;
515                 goto err_pci;
516         }
517
518         /*
519         ** Start from a known state, this is
520         ** important in reading the nvm and
521         ** mac from that.
522         */
523         e1000_reset_hw(&adapter->hw);
524
525         /* Make sure we have a good EEPROM before we read from it */
526         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
527                 /*
528                 ** Some PCI-E parts fail the first check due to
529                 ** the link being in sleep state, call it again,
530                 ** if it fails a second time its a real issue.
531                 */
532                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
533                         device_printf(dev,
534                             "The EEPROM Checksum Is Not Valid\n");
535                         error = EIO;
536                         goto err_late;
537                 }
538         }
539
540         /*
541         ** Copy the permanent MAC address out of the EEPROM
542         */
543         if (e1000_read_mac_addr(&adapter->hw) < 0) {
544                 device_printf(dev, "EEPROM read error while reading MAC"
545                     " address\n");
546                 error = EIO;
547                 goto err_late;
548         }
549         /* Check its sanity */
550         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
551                 device_printf(dev, "Invalid MAC address\n");
552                 error = EIO;
553                 goto err_late;
554         }
555
556         /* 
557         ** Configure Interrupts
558         */
559         if ((adapter->msix > 1) && (igb_enable_msix))
560                 error = igb_allocate_msix(adapter);
561         else /* MSI or Legacy */
562                 error = igb_allocate_legacy(adapter);
563         if (error)
564                 goto err_late;
565
566         /* Setup OS specific network interface */
567         igb_setup_interface(dev, adapter);
568
569         /* Now get a good starting state */
570         igb_reset(adapter);
571
572         /* Initialize statistics */
573         igb_update_stats_counters(adapter);
574
575         adapter->hw.mac.get_link_status = 1;
576         igb_update_link_status(adapter);
577
578         /* Indicate SOL/IDER usage */
579         if (e1000_check_reset_block(&adapter->hw))
580                 device_printf(dev,
581                     "PHY reset is blocked due to SOL/IDER session.\n");
582
583         /* Determine if we have to control management hardware */
584         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
585
586         /*
587          * Setup Wake-on-Lan
588          */
589         /* APME bit in EEPROM is mapped to WUC.APME */
590         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
591         if (eeprom_data)
592                 adapter->wol = E1000_WUFC_MAG;
593
594         /* Register for VLAN events */
595         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
596              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
597         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
598              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
599
600         /* Tell the stack that the interface is not active */
601         adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
602
603         INIT_DEBUGOUT("igb_attach: end");
604
605         return (0);
606
607 err_late:
608         igb_free_transmit_structures(adapter);
609         igb_free_receive_structures(adapter);
610         igb_release_hw_control(adapter);
611 err_pci:
612         igb_free_pci_resources(adapter);
613 err_sysctl:
614         sysctl_ctx_free(&adapter->sysctl_ctx);
615         IGB_CORE_LOCK_DESTROY(adapter);
616
617         return (error);
618 }
619
620 /*********************************************************************
621  *  Device removal routine
622  *
623  *  The detach entry point is called when the driver is being removed.
624  *  This routine stops the adapter and deallocates all the resources
625  *  that were allocated for driver operation.
626  *
627  *  return 0 on success, positive on failure
628  *********************************************************************/
629
630 static int
631 igb_detach(device_t dev)
632 {
633         struct adapter  *adapter = device_get_softc(dev);
634
635         INIT_DEBUGOUT("igb_detach: begin");
636
637         /* Make sure VLANS are not using driver */
638         if (adapter->ifp->if_vlantrunks != NULL) {
639                 device_printf(dev,"Vlan in use, detach first\n");
640                 return (EBUSY);
641         }
642
643         IGB_CORE_LOCK(adapter);
644         adapter->in_detach = 1;
645         igb_stop(adapter);
646         IGB_CORE_UNLOCK(adapter);
647
648         e1000_phy_hw_reset(&adapter->hw);
649
650         /* Give control back to firmware */
651         igb_release_manageability(adapter);
652         igb_release_hw_control(adapter);
653
654         if (adapter->wol) {
655                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
656                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
657                 igb_enable_wakeup(dev);
658         }
659
660         /* Unregister VLAN events */
661         if (adapter->vlan_attach != NULL)
662                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
663         if (adapter->vlan_detach != NULL)
664                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
665
666         ether_ifdetach(adapter->ifp);
667
668         //callout_drain(&adapter->timer);
669         callout_stop(&adapter->timer);
670
671         igb_free_pci_resources(adapter);
672         bus_generic_detach(dev);
673
674         igb_free_transmit_structures(adapter);
675         igb_free_receive_structures(adapter);
676
677         sysctl_ctx_free(&adapter->sysctl_ctx);
678         IGB_CORE_LOCK_DESTROY(adapter);
679
680         return (0);
681 }
682
683 /*********************************************************************
684  *
685  *  Shutdown entry point
686  *
687  **********************************************************************/
688
689 static int
690 igb_shutdown(device_t dev)
691 {
692         return igb_suspend(dev);
693 }
694
695 /*
696  * Suspend/resume device methods.
697  */
698 static int
699 igb_suspend(device_t dev)
700 {
701         struct adapter *adapter = device_get_softc(dev);
702
703         IGB_CORE_LOCK(adapter);
704
705         igb_stop(adapter);
706
707         igb_release_manageability(adapter);
708         igb_release_hw_control(adapter);
709
710         if (adapter->wol) {
711                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
712                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
713                 igb_enable_wakeup(dev);
714         }
715
716         IGB_CORE_UNLOCK(adapter);
717
718         return bus_generic_suspend(dev);
719 }
720
721 static int
722 igb_resume(device_t dev)
723 {
724         struct adapter *adapter = device_get_softc(dev);
725         struct ifnet *ifp = adapter->ifp;
726
727         IGB_CORE_LOCK(adapter);
728         igb_init_locked(adapter);
729         igb_init_manageability(adapter);
730
731         if ((ifp->if_flags & IFF_UP) &&
732             (ifp->if_flags & IFF_RUNNING))
733                 igb_start(ifp);
734
735         IGB_CORE_UNLOCK(adapter);
736
737         return bus_generic_resume(dev);
738 }
739
740
741 /*********************************************************************
742  *  Transmit entry point
743  *
744  *  igb_start is called by the stack to initiate a transmit.
745  *  The driver will remain in this routine as long as there are
746  *  packets to transmit and transmit resources are available.
747  *  In case resources are not available stack is notified and
748  *  the packet is requeued.
749  **********************************************************************/
750
751 static void
752 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
753 {
754         struct adapter  *adapter = ifp->if_softc;
755         struct mbuf     *m_head;
756
757         IGB_TX_LOCK_ASSERT(txr);
758
759         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
760             IFF_RUNNING)
761                 return;
762         if (!adapter->link_active)
763                 return;
764
765         while (!ifq_is_empty(&ifp->if_snd)) {
766
767                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
768                 if (m_head == NULL)
769                         break;
770                 /*
771                  *  Encapsulation can modify our pointer, and or make it
772                  *  NULL on failure.  In that event, we can't requeue.
773                  */
774                 if (igb_xmit(txr, &m_head)) {
775                         if (m_head == NULL)
776                                 break;
777                         ifp->if_flags |= IFF_OACTIVE;
778                         ifq_prepend(&ifp->if_snd, m_head);
779                         break;
780                 }
781
782                 /* Send a copy of the frame to the BPF listener */
783                 ETHER_BPF_MTAP(ifp, m_head);
784
785                 /* Set watchdog on */
786                 txr->watchdog_check = TRUE;
787         }
788 }
789  
790 /*
791  * Legacy TX driver routine, called from the
792  * stack, always uses tx[0], and spins for it.
793  * Should not be used with multiqueue tx
794  */
795 static void
796 igb_start(struct ifnet *ifp)
797 {
798         struct adapter  *adapter = ifp->if_softc;
799         struct tx_ring  *txr = adapter->tx_rings;
800
801         if (ifp->if_flags & IFF_RUNNING) {
802                 IGB_TX_LOCK(txr);
803                 igb_start_locked(txr, ifp);
804                 IGB_TX_UNLOCK(txr);
805         }
806         return;
807 }
808
809 #if __FreeBSD_version >= 800000
810 /*
811 ** Multiqueue Transmit driver
812 **
813 */
814 static int
815 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
816 {
817         struct adapter  *adapter = ifp->if_softc;
818         struct tx_ring  *txr;
819         int             i = 0, err = 0;
820
821         /* Which queue to use */
822         if ((m->m_flags & M_FLOWID) != 0)
823                 i = m->m_pkthdr.flowid % adapter->num_queues;
824         txr = &adapter->tx_rings[i];
825
826         if (IGB_TX_TRYLOCK(txr)) {
827                 err = igb_mq_start_locked(ifp, txr, m);
828                 IGB_TX_UNLOCK(txr);
829         } else
830                 err = drbr_enqueue(ifp, txr->br, m);
831
832         return (err);
833 }
834
835 static int
836 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
837 {
838         struct adapter  *adapter = txr->adapter;
839         struct mbuf     *next;
840         int             err = 0, enq;
841
842         IGB_TX_LOCK_ASSERT(txr);
843
844         if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) !=
845             IFF_RUNNING || adapter->link_active == 0) {
846                 if (m != NULL)
847                         err = drbr_enqueue(ifp, txr->br, m);
848                 return (err);
849         }
850
851         enq = 0;
852         if (m == NULL) {
853                 next = drbr_dequeue(ifp, txr->br);
854         } else if (drbr_needs_enqueue(ifp, txr->br)) {
855                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
856                         return (err);
857                 next = drbr_dequeue(ifp, txr->br);
858         } else
859                 next = m;
860         /* Process the queue */
861         while (next != NULL) {
862                 if ((err = igb_xmit(txr, &next)) != 0) {
863                         if (next != NULL)
864                                 err = drbr_enqueue(ifp, txr->br, next);
865                         break;
866                 }
867                 enq++;
868                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
869                 ETHER_BPF_MTAP(ifp, next);
870                 if ((ifp->if_flags & IFF_RUNNING) == 0)
871                         break;
872                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
873                         ifp->if_flags |= IFF_OACTIVE;
874                         break;
875                 }
876                 next = drbr_dequeue(ifp, txr->br);
877         }
878         if (enq > 0) {
879                 /* Set the watchdog */
880                 txr->watchdog_check = TRUE;
881         }
882         return (err);
883 }
884
885 /*
886 ** Flush all ring buffers
887 */
888 static void
889 igb_qflush(struct ifnet *ifp)
890 {
891         struct adapter  *adapter = ifp->if_softc;
892         struct tx_ring  *txr = adapter->tx_rings;
893         struct mbuf     *m;
894
895         for (int i = 0; i < adapter->num_queues; i++, txr++) {
896                 IGB_TX_LOCK(txr);
897                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
898                         m_freem(m);
899                 IGB_TX_UNLOCK(txr);
900         }
901         if_qflush(ifp);
902 }
903 #endif /* __FreeBSD_version >= 800000 */
904
905 /*********************************************************************
906  *  Ioctl entry point
907  *
908  *  igb_ioctl is called when the user wants to configure the
909  *  interface.
910  *
911  *  return 0 on success, positive on failure
912  **********************************************************************/
913
914 static int
915 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cred)
916 {
917         struct adapter  *adapter = ifp->if_softc;
918         struct ifreq *ifr = (struct ifreq *)data;
919 #ifdef INET
920         struct ifaddr *ifa = (struct ifaddr *)data;
921 #endif
922         int error = 0;
923
924         if (adapter->in_detach)
925                 return (error);
926
927         switch (command) {
928         case SIOCSIFADDR:
929 #ifdef INET
930                 if (ifa->ifa_addr->sa_family == AF_INET) {
931                         /*
932                          * XXX
933                          * Since resetting hardware takes a very long time
934                          * and results in link renegotiation we only
935                          * initialize the hardware only when it is absolutely
936                          * required.
937                          */
938                         ifp->if_flags |= IFF_UP;
939                         if (!(ifp->if_flags & IFF_RUNNING)) {
940                                 IGB_CORE_LOCK(adapter);
941                                 igb_init_locked(adapter);
942                                 IGB_CORE_UNLOCK(adapter);
943                         }
944                         if (!(ifp->if_flags & IFF_NOARP))
945                                 arp_ifinit(ifp, ifa);
946                 } else
947 #endif
948                         error = ether_ioctl(ifp, command, data);
949                 break;
950         case SIOCSIFMTU:
951             {
952                 int max_frame_size;
953
954                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
955
956                 IGB_CORE_LOCK(adapter);
957                 max_frame_size = 9234;
958                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
959                     ETHER_CRC_LEN) {
960                         IGB_CORE_UNLOCK(adapter);
961                         error = EINVAL;
962                         break;
963                 }
964
965                 ifp->if_mtu = ifr->ifr_mtu;
966                 adapter->max_frame_size =
967                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
968                 igb_init_locked(adapter);
969                 IGB_CORE_UNLOCK(adapter);
970                 break;
971             }
972         case SIOCSIFFLAGS:
973                 IOCTL_DEBUGOUT("ioctl rcv'd:\
974                     SIOCSIFFLAGS (Set Interface Flags)");
975                 IGB_CORE_LOCK(adapter);
976                 if (ifp->if_flags & IFF_UP) {
977                         if ((ifp->if_flags & IFF_RUNNING)) {
978                                 if ((ifp->if_flags ^ adapter->if_flags) &
979                                     (IFF_PROMISC | IFF_ALLMULTI)) {
980                                         igb_disable_promisc(adapter);
981                                         igb_set_promisc(adapter);
982                                 }
983                         } else
984                                 igb_init_locked(adapter);
985                 } else
986                         if (ifp->if_flags & IFF_RUNNING)
987                                 igb_stop(adapter); 
988                 adapter->if_flags = ifp->if_flags;
989                 IGB_CORE_UNLOCK(adapter);
990                 break;
991         case SIOCADDMULTI:
992         case SIOCDELMULTI:
993                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
994                 if (ifp->if_flags & IFF_RUNNING) {
995                         IGB_CORE_LOCK(adapter);
996                         igb_disable_intr(adapter);
997                         igb_set_multi(adapter);
998 #ifdef DEVICE_POLLING
999                         if ((ifp->if_flags & IFF_POLLING) == 0)
1000 #endif
1001                                 igb_enable_intr(adapter);
1002                         IGB_CORE_UNLOCK(adapter);
1003                 }
1004                 break;
1005         case SIOCSIFMEDIA:
1006                 /* Check SOL/IDER usage */
1007                 IGB_CORE_LOCK(adapter);
1008                 if (e1000_check_reset_block(&adapter->hw)) {
1009                         IGB_CORE_UNLOCK(adapter);
1010                         device_printf(adapter->dev, "Media change is"
1011                             " blocked due to SOL/IDER session.\n");
1012                         break;
1013                 }
1014                 IGB_CORE_UNLOCK(adapter);
1015         case SIOCGIFMEDIA:
1016                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1017                     SIOCxIFMEDIA (Get/Set Interface Media)");
1018                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1019                 break;
1020         case SIOCSIFCAP:
1021             {
1022                 int mask, reinit;
1023
1024                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1025                 reinit = 0;
1026                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1027 #ifdef DEVICE_POLLING
1028                 if (ifp->if_flags & IFF_POLLING) {
1029                         IGB_CORE_LOCK(adapter);
1030                         igb_disable_intr(adapter);
1031                         IGB_CORE_UNLOCK(adapter);
1032                 }
1033 #endif
1034                 if (mask & IFCAP_HWCSUM) {
1035                         ifp->if_capenable ^= IFCAP_HWCSUM;
1036                         reinit = 1;
1037                 }
1038 #ifdef NET_TSO 
1039                 if (mask & IFCAP_TSO4) {
1040                         ifp->if_capenable ^= IFCAP_TSO4;
1041                         reinit = 1;
1042                 }
1043 #endif
1044                 if (mask & IFCAP_VLAN_HWTAGGING) {
1045                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1046                         reinit = 1;
1047                 }
1048 #ifdef NET_LRO 
1049                 if (mask & IFCAP_LRO) {
1050                         ifp->if_capenable ^= IFCAP_LRO;
1051                         reinit = 1;
1052                 }
1053 #endif
1054                 if (reinit && (ifp->if_flags & IFF_RUNNING))
1055                         igb_init(adapter);
1056 #if 0
1057                 VLAN_CAPABILITIES(ifp);
1058 #endif
1059                 break;
1060             }
1061
1062         default:
1063                 error = ether_ioctl(ifp, command, data);
1064                 break;
1065         }
1066
1067         return (error);
1068 }
1069
1070
1071 /*********************************************************************
1072  *  Init entry point
1073  *
1074  *  This routine is used in two ways. It is used by the stack as
1075  *  init entry point in network interface structure. It is also used
1076  *  by the driver as a hw/sw initialization routine to get to a
1077  *  consistent state.
1078  *
1079  *  return 0 on success, positive on failure
1080  **********************************************************************/
1081
1082 static void
1083 igb_init_locked(struct adapter *adapter)
1084 {
1085         struct ifnet    *ifp = adapter->ifp;
1086         device_t        dev = adapter->dev;
1087
1088         INIT_DEBUGOUT("igb_init: begin");
1089
1090         IGB_CORE_LOCK_ASSERT(adapter);
1091
1092         igb_disable_intr(adapter);
1093         callout_stop(&adapter->timer);
1094
1095         /* Get the latest mac address, User can use a LAA */
1096         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1097               ETHER_ADDR_LEN);
1098
1099         /* Put the address into the Receive Address Array */
1100         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1101
1102         igb_reset(adapter);
1103         igb_update_link_status(adapter);
1104
1105         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1106
1107         /* Set hardware offload abilities */
1108         ifp->if_hwassist = 0;
1109         if (ifp->if_capenable & IFCAP_TXCSUM) {
1110                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1111 #if __FreeBSD_version >= 800000
1112                 if (adapter->hw.mac.type == e1000_82576)
1113                         ifp->if_hwassist |= CSUM_SCTP;
1114 #endif
1115         }
1116
1117 #ifdef NET_TSO
1118         if (ifp->if_capenable & IFCAP_TSO4)
1119                 ifp->if_hwassist |= CSUM_TSO;
1120 #endif
1121
1122         /* Configure for OS presence */
1123         igb_init_manageability(adapter);
1124
1125         /* Prepare transmit descriptors and buffers */
1126         igb_setup_transmit_structures(adapter);
1127         igb_initialize_transmit_units(adapter);
1128
1129         /* Setup Multicast table */
1130         igb_set_multi(adapter);
1131
1132         /*
1133         ** Figure out the desired mbuf pool
1134         ** for doing jumbo/packetsplit
1135         */
1136         if (ifp->if_mtu > ETHERMTU)
1137                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1138         else
1139                 adapter->rx_mbuf_sz = MCLBYTES;
1140
1141         /* Prepare receive descriptors and buffers */
1142         if (igb_setup_receive_structures(adapter)) {
1143                 device_printf(dev, "Could not setup receive structures\n");
1144                 return;
1145         }
1146         igb_initialize_receive_units(adapter);
1147
1148         /* Don't lose promiscuous settings */
1149         igb_set_promisc(adapter);
1150
1151         ifp->if_flags |= IFF_RUNNING;
1152         ifp->if_flags &= ~IFF_OACTIVE;
1153
1154         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1155         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1156
1157         if (adapter->msix > 1) /* Set up queue routing */
1158                 igb_configure_queues(adapter);
1159
1160         /* Set up VLAN tag offload and filter */
1161         igb_setup_vlan_hw_support(adapter);
1162
1163         /* this clears any pending interrupts */
1164         E1000_READ_REG(&adapter->hw, E1000_ICR);
1165 #ifdef DEVICE_POLLING
1166         /*
1167          * Only enable interrupts if we are not polling, make sure
1168          * they are off otherwise.
1169          */
1170         if (ifp->if_flags & IFF_POLLING)
1171                 igb_disable_intr(adapter);
1172         else
1173 #endif /* DEVICE_POLLING */
1174         {
1175         igb_enable_intr(adapter);
1176         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1177         }
1178
1179         /* Don't reset the phy next time init gets called */
1180         adapter->hw.phy.reset_disable = TRUE;
1181 }
1182
1183 static void
1184 igb_init(void *arg)
1185 {
1186         struct adapter *adapter = arg;
1187
1188         IGB_CORE_LOCK(adapter);
1189         igb_init_locked(adapter);
1190         IGB_CORE_UNLOCK(adapter);
1191 }
1192
1193
1194 static void
1195 igb_handle_rxtx(void *context, int pending)
1196 {
1197         struct adapter  *adapter = context;
1198         struct tx_ring  *txr = adapter->tx_rings;
1199         struct rx_ring  *rxr = adapter->rx_rings;
1200         struct ifnet    *ifp;
1201
1202         ifp = adapter->ifp;
1203
1204         if (ifp->if_flags & IFF_RUNNING) {
1205                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1206                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1207                 IGB_TX_LOCK(txr);
1208                 igb_txeof(txr);
1209
1210 #if __FreeBSD_version >= 800000
1211                 if (!drbr_empty(ifp, txr->br))
1212                         igb_mq_start_locked(ifp, txr, NULL);
1213 #else
1214                 if (!ifq_is_empty(&ifp->if_snd))
1215                         igb_start_locked(txr, ifp);
1216 #endif
1217                 IGB_TX_UNLOCK(txr);
1218         }
1219
1220         igb_enable_intr(adapter);
1221 }
1222
1223 static void
1224 igb_handle_que(void *context, int pending)
1225 {
1226         struct igb_queue *que = context;
1227         struct adapter *adapter = que->adapter;
1228         struct tx_ring *txr = que->txr;
1229         struct rx_ring *rxr = que->rxr;
1230         struct ifnet    *ifp = adapter->ifp;
1231         u32             loop = IGB_MAX_LOOP;
1232         bool            more;
1233
1234         /* RX first */
1235         do {
1236                 more = igb_rxeof(rxr, -1);
1237         } while (loop-- && more);
1238
1239         if (IGB_TX_TRYLOCK(txr)) {
1240                 loop = IGB_MAX_LOOP;
1241                 do {
1242                         more = igb_txeof(txr);
1243                 } while (loop-- && more);
1244 #if __FreeBSD_version >= 800000
1245                 igb_mq_start_locked(ifp, txr, NULL);
1246 #else
1247                 if (!ifq_is_empty(&ifp->if_snd))
1248                         igb_start_locked(txr, ifp);
1249 #endif
1250                 IGB_TX_UNLOCK(txr);
1251         }
1252
1253         /* Reenable this interrupt */
1254 #ifdef DEVICE_POLLING
1255         if ((ifp->if_flags & IFF_POLLING) == 0)
1256 #endif
1257                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1258 }
1259
1260 /* Deal with link in a sleepable context */
1261 static void
1262 igb_handle_link(void *context, int pending)
1263 {
1264         struct adapter *adapter = context;
1265
1266         adapter->hw.mac.get_link_status = 1;
1267         igb_update_link_status(adapter);
1268 }
1269
1270 /*********************************************************************
1271  *
1272  *  MSI/Legacy Deferred
1273  *  Interrupt Service routine  
1274  *
1275  *********************************************************************/
1276 #define FILTER_STRAY
1277 #define FILTER_HANDLED
1278 static void
1279 igb_irq_fast(void *arg)
1280 {
1281         struct adapter  *adapter = arg;
1282         uint32_t        reg_icr;
1283
1284
1285         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1286
1287         /* Hot eject?  */
1288         if (reg_icr == 0xffffffff)
1289                 return FILTER_STRAY; 
1290
1291         /* Definitely not our interrupt.  */
1292         if (reg_icr == 0x0)
1293                 return FILTER_STRAY;
1294
1295         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1296                 return FILTER_STRAY;
1297
1298         /*
1299          * Mask interrupts until the taskqueue is finished running.  This is
1300          * cheap, just assume that it is needed.  This also works around the
1301          * MSI message reordering errata on certain systems.
1302          */
1303         igb_disable_intr(adapter);
1304         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1305
1306         /* Link status change */
1307         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1308                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1309
1310         if (reg_icr & E1000_ICR_RXO)
1311                 adapter->rx_overruns++;
1312         return FILTER_HANDLED;
1313 }
1314
1315 #ifdef DEVICE_POLLING
1316 /*********************************************************************
1317  *
1318  *  Legacy polling routine  
1319  *
1320  *********************************************************************/
1321 #if __FreeBSD_version >= 800000
1322 #define POLL_RETURN_COUNT(a) (a)
1323 static int
1324 #else
1325 #define POLL_RETURN_COUNT(a)
1326 static void
1327 #endif
1328 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1329 {
1330         struct adapter *adapter = ifp->if_softc;
1331         struct rx_ring  *rxr = adapter->rx_rings;
1332         struct tx_ring  *txr = adapter->tx_rings;
1333         u32             reg_icr, rx_done = 0;
1334         u32             loop = IGB_MAX_LOOP;
1335         bool            more;
1336
1337         IGB_CORE_LOCK(adapter);
1338         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1339                 IGB_CORE_UNLOCK(adapter);
1340                 return POLL_RETURN_COUNT(rx_done);
1341         }
1342
1343         if (cmd == POLL_AND_CHECK_STATUS) {
1344                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1345                 /* Link status change */
1346                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1347                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1348
1349                 if (reg_icr & E1000_ICR_RXO)
1350                         adapter->rx_overruns++;
1351         }
1352         IGB_CORE_UNLOCK(adapter);
1353
1354         /* TODO: rx_count */
1355         rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1356
1357         IGB_TX_LOCK(txr);
1358         do {
1359                 more = igb_txeof(txr);
1360         } while (loop-- && more);
1361 #if __FreeBSD_version >= 800000
1362         if (!drbr_empty(ifp, txr->br))
1363                 igb_mq_start_locked(ifp, txr, NULL);
1364 #else
1365         if (!ifq_is_empty(&ifp->if_snd))
1366                 igb_start_locked(txr, ifp);
1367 #endif
1368         IGB_TX_UNLOCK(txr);
1369         return POLL_RETURN_COUNT(rx_done);
1370 }
1371 #endif /* DEVICE_POLLING */
1372
1373 /*********************************************************************
1374  *
1375  *  MSIX TX Interrupt Service routine
1376  *
1377  **********************************************************************/
1378 static void
1379 igb_msix_que(void *arg)
1380 {
1381         struct igb_queue *que = arg;
1382         struct adapter *adapter = que->adapter;
1383         struct tx_ring *txr = que->txr;
1384         struct rx_ring *rxr = que->rxr;
1385         u32             newitr = 0;
1386         bool            more_tx, more_rx;
1387
1388         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1389         ++que->irqs;
1390
1391         IGB_TX_LOCK(txr);
1392         more_tx = igb_txeof(txr);
1393         IGB_TX_UNLOCK(txr);
1394
1395         more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1396
1397         if (igb_enable_aim == FALSE)
1398                 goto no_calc;
1399         /*
1400         ** Do Adaptive Interrupt Moderation:
1401         **  - Write out last calculated setting
1402         **  - Calculate based on average size over
1403         **    the last interval.
1404         */
1405         if (que->eitr_setting)
1406                 E1000_WRITE_REG(&adapter->hw,
1407                     E1000_EITR(que->msix), que->eitr_setting);
1408  
1409         que->eitr_setting = 0;
1410
1411         /* Idle, do nothing */
1412         if ((txr->bytes == 0) && (rxr->bytes == 0))
1413                 goto no_calc;
1414                                 
1415         /* Used half Default if sub-gig */
1416         if (adapter->link_speed != 1000)
1417                 newitr = IGB_DEFAULT_ITR / 2;
1418         else {
1419                 if ((txr->bytes) && (txr->packets))
1420                         newitr = txr->bytes/txr->packets;
1421                 if ((rxr->bytes) && (rxr->packets))
1422                         newitr = max(newitr,
1423                             (rxr->bytes / rxr->packets));
1424                 newitr += 24; /* account for hardware frame, crc */
1425                 /* set an upper boundary */
1426                 newitr = min(newitr, 3000);
1427                 /* Be nice to the mid range */
1428                 if ((newitr > 300) && (newitr < 1200))
1429                         newitr = (newitr / 3);
1430                 else
1431                         newitr = (newitr / 2);
1432         }
1433         newitr &= 0x7FFC;  /* Mask invalid bits */
1434         if (adapter->hw.mac.type == e1000_82575)
1435                 newitr |= newitr << 16;
1436         else
1437                 newitr |= 0x8000000;
1438                  
1439         /* save for next interrupt */
1440         que->eitr_setting = newitr;
1441
1442         /* Reset state */
1443         txr->bytes = 0;
1444         txr->packets = 0;
1445         rxr->bytes = 0;
1446         rxr->packets = 0;
1447
1448 no_calc:
1449         /* Schedule a clean task if needed*/
1450         if (more_tx || more_rx) 
1451                 taskqueue_enqueue(que->tq, &que->que_task);
1452         else
1453                 /* Reenable this interrupt */
1454                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1455         return;
1456 }
1457
1458
1459 /*********************************************************************
1460  *
1461  *  MSIX Link Interrupt Service routine
1462  *
1463  **********************************************************************/
1464
1465 static void
1466 igb_msix_link(void *arg)
1467 {
1468         struct adapter  *adapter = arg;
1469         u32             icr;
1470
1471         ++adapter->link_irq;
1472         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1473         if (!(icr & E1000_ICR_LSC))
1474                 goto spurious;
1475         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1476
1477 spurious:
1478         /* Rearm */
1479         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1480         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1481         return;
1482 }
1483
1484
1485 /*********************************************************************
1486  *
1487  *  Media Ioctl callback
1488  *
1489  *  This routine is called whenever the user queries the status of
1490  *  the interface using ifconfig.
1491  *
1492  **********************************************************************/
1493 static void
1494 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1495 {
1496         struct adapter *adapter = ifp->if_softc;
1497         u_char fiber_type = IFM_1000_SX;
1498
1499         INIT_DEBUGOUT("igb_media_status: begin");
1500
1501         IGB_CORE_LOCK(adapter);
1502         igb_update_link_status(adapter);
1503
1504         ifmr->ifm_status = IFM_AVALID;
1505         ifmr->ifm_active = IFM_ETHER;
1506
1507         if (!adapter->link_active) {
1508                 IGB_CORE_UNLOCK(adapter);
1509                 return;
1510         }
1511
1512         ifmr->ifm_status |= IFM_ACTIVE;
1513
1514         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1515             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1516                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1517         else {
1518                 switch (adapter->link_speed) {
1519                 case 10:
1520                         ifmr->ifm_active |= IFM_10_T;
1521                         break;
1522                 case 100:
1523                         ifmr->ifm_active |= IFM_100_TX;
1524                         break;
1525                 case 1000:
1526                         ifmr->ifm_active |= IFM_1000_T;
1527                         break;
1528                 }
1529                 if (adapter->link_duplex == FULL_DUPLEX)
1530                         ifmr->ifm_active |= IFM_FDX;
1531                 else
1532                         ifmr->ifm_active |= IFM_HDX;
1533         }
1534         IGB_CORE_UNLOCK(adapter);
1535 }
1536
1537 /*********************************************************************
1538  *
1539  *  Media Ioctl callback
1540  *
1541  *  This routine is called when the user changes speed/duplex using
1542  *  media/mediopt option with ifconfig.
1543  *
1544  **********************************************************************/
1545 static int
1546 igb_media_change(struct ifnet *ifp)
1547 {
1548         struct adapter *adapter = ifp->if_softc;
1549         struct ifmedia  *ifm = &adapter->media;
1550
1551         INIT_DEBUGOUT("igb_media_change: begin");
1552
1553         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1554                 return (EINVAL);
1555
1556         IGB_CORE_LOCK(adapter);
1557         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1558         case IFM_AUTO:
1559                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1560                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1561                 break;
1562         case IFM_1000_LX:
1563         case IFM_1000_SX:
1564         case IFM_1000_T:
1565                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1566                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1567                 break;
1568         case IFM_100_TX:
1569                 adapter->hw.mac.autoneg = FALSE;
1570                 adapter->hw.phy.autoneg_advertised = 0;
1571                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1572                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1573                 else
1574                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1575                 break;
1576         case IFM_10_T:
1577                 adapter->hw.mac.autoneg = FALSE;
1578                 adapter->hw.phy.autoneg_advertised = 0;
1579                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1580                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1581                 else
1582                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1583                 break;
1584         default:
1585                 device_printf(adapter->dev, "Unsupported media type\n");
1586         }
1587
1588         /* As the speed/duplex settings my have changed we need to
1589          * reset the PHY.
1590          */
1591         adapter->hw.phy.reset_disable = FALSE;
1592
1593         igb_init_locked(adapter);
1594         IGB_CORE_UNLOCK(adapter);
1595
1596         return (0);
1597 }
1598
1599
1600 /*********************************************************************
1601  *
1602  *  This routine maps the mbufs to Advanced TX descriptors.
1603  *  used by the 82575 adapter.
1604  *  
1605  **********************************************************************/
1606
1607 static int
1608 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1609 {
1610         struct adapter          *adapter = txr->adapter;
1611         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1612         bus_dmamap_t            map;
1613         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1614         union e1000_adv_tx_desc *txd = NULL;
1615         struct mbuf             *m_head;
1616         u32                     olinfo_status = 0, cmd_type_len = 0;
1617         int                     nsegs, i, j, error, first, last = 0;
1618         u32                     hdrlen = 0;
1619
1620         m_head = *m_headp;
1621
1622
1623         /* Set basic descriptor constants */
1624         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1625         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1626         if (m_head->m_flags & M_VLANTAG)
1627                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1628
1629         /*
1630          * Force a cleanup if number of TX descriptors
1631          * available hits the threshold
1632          */
1633         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1634                 igb_txeof(txr);
1635                 /* Now do we at least have a minimal? */
1636                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1637                         txr->no_desc_avail++;
1638                         return (ENOBUFS);
1639                 }
1640         }
1641
1642         /*
1643          * Map the packet for DMA.
1644          *
1645          * Capture the first descriptor index,
1646          * this descriptor will have the index
1647          * of the EOP which is the only one that
1648          * now gets a DONE bit writeback.
1649          */
1650         first = txr->next_avail_desc;
1651         tx_buffer = &txr->tx_buffers[first];
1652         tx_buffer_mapped = tx_buffer;
1653         map = tx_buffer->map;
1654
1655         error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1656             *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1657
1658         if (error == EFBIG) {
1659                 struct mbuf *m;
1660
1661                 m = m_defrag(*m_headp, MB_DONTWAIT);
1662                 if (m == NULL) {
1663                         adapter->mbuf_defrag_failed++;
1664                         m_freem(*m_headp);
1665                         *m_headp = NULL;
1666                         return (ENOBUFS);
1667                 }
1668                 *m_headp = m;
1669
1670                 /* Try it again */
1671                 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1672                     *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1673
1674                 if (error == ENOMEM) {
1675                         adapter->no_tx_dma_setup++;
1676                         return (error);
1677                 } else if (error != 0) {
1678                         adapter->no_tx_dma_setup++;
1679                         m_freem(*m_headp);
1680                         *m_headp = NULL;
1681                         return (error);
1682                 }
1683         } else if (error == ENOMEM) {
1684                 adapter->no_tx_dma_setup++;
1685                 return (error);
1686         } else if (error != 0) {
1687                 adapter->no_tx_dma_setup++;
1688                 m_freem(*m_headp);
1689                 *m_headp = NULL;
1690                 return (error);
1691         }
1692
1693         /* Check again to be sure we have enough descriptors */
1694         if (nsegs > (txr->tx_avail - 2)) {
1695                 txr->no_desc_avail++;
1696                 bus_dmamap_unload(txr->txtag, map);
1697                 return (ENOBUFS);
1698         }
1699         m_head = *m_headp;
1700
1701         /*
1702          * Set up the context descriptor:
1703          * used when any hardware offload is done.
1704          * This includes CSUM, VLAN, and TSO. It
1705          * will use the first descriptor.
1706          */
1707 #ifdef NET_TSO
1708         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1709                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1710                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1711                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1712                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1713                 } else
1714                         return (ENXIO); 
1715         } else
1716 #endif
1717                if (igb_tx_ctx_setup(txr, m_head))
1718                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1719
1720         /* Calculate payload length */
1721         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1722             << E1000_ADVTXD_PAYLEN_SHIFT);
1723
1724         /* 82575 needs the queue index added */
1725         if (adapter->hw.mac.type == e1000_82575)
1726                 olinfo_status |= txr->me << 4;
1727
1728         /* Set up our transmit descriptors */
1729         i = txr->next_avail_desc;
1730         for (j = 0; j < nsegs; j++) {
1731                 bus_size_t seg_len;
1732                 bus_addr_t seg_addr;
1733
1734                 tx_buffer = &txr->tx_buffers[i];
1735                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1736                 seg_addr = segs[j].ds_addr;
1737                 seg_len  = segs[j].ds_len;
1738
1739                 txd->read.buffer_addr = htole64(seg_addr);
1740                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1741                 txd->read.olinfo_status = htole32(olinfo_status);
1742                 last = i;
1743                 if (++i == adapter->num_tx_desc)
1744                         i = 0;
1745                 tx_buffer->m_head = NULL;
1746                 tx_buffer->next_eop = -1;
1747         }
1748
1749         txr->next_avail_desc = i;
1750         txr->tx_avail -= nsegs;
1751
1752         tx_buffer->m_head = m_head;
1753         tx_buffer_mapped->map = tx_buffer->map;
1754         tx_buffer->map = map;
1755         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1756
1757         /*
1758          * Last Descriptor of Packet
1759          * needs End Of Packet (EOP)
1760          * and Report Status (RS)
1761          */
1762         txd->read.cmd_type_len |=
1763             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1764         /*
1765          * Keep track in the first buffer which
1766          * descriptor will be written back
1767          */
1768         tx_buffer = &txr->tx_buffers[first];
1769         tx_buffer->next_eop = last;
1770         txr->watchdog_time = ticks;
1771
1772         /*
1773          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1774          * that this frame is available to transmit.
1775          */
1776         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1777             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1778         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1779         ++txr->tx_packets;
1780
1781         return (0);
1782
1783 }
1784
1785 static void
1786 igb_set_promisc(struct adapter *adapter)
1787 {
1788         struct ifnet    *ifp = adapter->ifp;
1789         uint32_t        reg_rctl;
1790
1791         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1792
1793         if (ifp->if_flags & IFF_PROMISC) {
1794                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1795                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1796         } else if (ifp->if_flags & IFF_ALLMULTI) {
1797                 reg_rctl |= E1000_RCTL_MPE;
1798                 reg_rctl &= ~E1000_RCTL_UPE;
1799                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1800         }
1801 }
1802
1803 static void
1804 igb_disable_promisc(struct adapter *adapter)
1805 {
1806         uint32_t        reg_rctl;
1807
1808         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1809
1810         reg_rctl &=  (~E1000_RCTL_UPE);
1811         reg_rctl &=  (~E1000_RCTL_MPE);
1812         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1813 }
1814
1815
1816 /*********************************************************************
1817  *  Multicast Update
1818  *
1819  *  This routine is called whenever multicast address list is updated.
1820  *
1821  **********************************************************************/
1822
1823 static void
1824 igb_set_multi(struct adapter *adapter)
1825 {
1826         struct ifnet    *ifp = adapter->ifp;
1827         struct ifmultiaddr *ifma;
1828         u32 reg_rctl = 0;
1829         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1830
1831         int mcnt = 0;
1832
1833         IOCTL_DEBUGOUT("igb_set_multi: begin");
1834
1835 #if 0
1836 #if __FreeBSD_version < 800000
1837         IF_ADDR_LOCK(ifp);
1838 #else
1839         if_maddr_rlock(ifp);
1840 #endif
1841 #endif
1842
1843         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1844                 if (ifma->ifma_addr->sa_family != AF_LINK)
1845                         continue;
1846
1847                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1848                         break;
1849
1850                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1851                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1852                 mcnt++;
1853         }
1854 #if 0
1855 #if __FreeBSD_version < 800000
1856         IF_ADDR_UNLOCK(ifp);
1857 #else
1858         if_maddr_runlock(ifp);
1859 #endif
1860 #endif
1861
1862         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1863                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1864                 reg_rctl |= E1000_RCTL_MPE;
1865                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1866         } else
1867                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1868 }
1869
1870
1871 /*********************************************************************
1872  *  Timer routine:
1873  *      This routine checks for link status,
1874  *      updates statistics, and does the watchdog.
1875  *
1876  **********************************************************************/
1877
1878 static void
1879 igb_local_timer(void *arg)
1880 {
1881         struct adapter          *adapter = arg;
1882
1883         IGB_CORE_LOCK(adapter);
1884
1885         struct ifnet            *ifp = adapter->ifp;
1886         device_t                dev = adapter->dev;
1887         struct tx_ring          *txr = adapter->tx_rings;
1888
1889
1890         IGB_CORE_LOCK_ASSERT(adapter);
1891
1892         igb_update_link_status(adapter);
1893         igb_update_stats_counters(adapter);
1894
1895         if (igb_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1896                 igb_print_hw_stats(adapter);
1897
1898         /*
1899         ** Watchdog: check for time since any descriptor was cleaned
1900         */
1901         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1902                 if (txr->watchdog_check == FALSE)
1903                         continue;
1904                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1905                         goto timeout;
1906         }
1907
1908         /* Trigger an RX interrupt on all queues */
1909 #ifdef DEVICE_POLLING
1910         if ((ifp->if_flags & IFF_POLLING) == 0)
1911 #endif
1912                 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1913         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1914         IGB_CORE_UNLOCK(adapter);
1915         return;
1916
1917 timeout:
1918         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1919         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1920             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1921             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1922         device_printf(dev,"TX(%d) desc avail = %d,"
1923             "Next TX to Clean = %d\n",
1924             txr->me, txr->tx_avail, txr->next_to_clean);
1925         adapter->ifp->if_flags &= ~IFF_RUNNING;
1926         adapter->watchdog_events++;
1927         igb_init_locked(adapter);
1928         IGB_CORE_UNLOCK(adapter);
1929 }
1930
1931 static void
1932 igb_update_link_status(struct adapter *adapter)
1933 {
1934         struct e1000_hw *hw = &adapter->hw;
1935         struct ifnet *ifp = adapter->ifp;
1936         device_t dev = adapter->dev;
1937         struct tx_ring *txr = adapter->tx_rings;
1938         u32 link_check = 0;
1939
1940         /* Get the cached link value or read for real */
1941         switch (hw->phy.media_type) {
1942         case e1000_media_type_copper:
1943                 if (hw->mac.get_link_status) {
1944                         /* Do the work to read phy */
1945                         e1000_check_for_link(hw);
1946                         link_check = !hw->mac.get_link_status;
1947                 } else
1948                         link_check = TRUE;
1949                 break;
1950         case e1000_media_type_fiber:
1951                 e1000_check_for_link(hw);
1952                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1953                                  E1000_STATUS_LU);
1954                 break;
1955         case e1000_media_type_internal_serdes:
1956                 e1000_check_for_link(hw);
1957                 link_check = adapter->hw.mac.serdes_has_link;
1958                 break;
1959         default:
1960         case e1000_media_type_unknown:
1961                 break;
1962         }
1963
1964         /* Now we check if a transition has happened */
1965         if (link_check && (adapter->link_active == 0)) {
1966                 e1000_get_speed_and_duplex(&adapter->hw, 
1967                     &adapter->link_speed, &adapter->link_duplex);
1968                 if (bootverbose)
1969                         device_printf(dev, "Link is up %d Mbps %s\n",
1970                             adapter->link_speed,
1971                             ((adapter->link_duplex == FULL_DUPLEX) ?
1972                             "Full Duplex" : "Half Duplex"));
1973                 adapter->link_active = 1;
1974                 ifp->if_baudrate = adapter->link_speed * 1000000;
1975                 ifp->if_link_state = LINK_STATE_UP;
1976                 if_link_state_change(ifp);
1977         } else if (!link_check && (adapter->link_active == 1)) {
1978                 ifp->if_baudrate = adapter->link_speed = 0;
1979                 adapter->link_duplex = 0;
1980                 if (bootverbose)
1981                         device_printf(dev, "Link is Down\n");
1982                 adapter->link_active = 0;
1983                 ifp->if_link_state = LINK_STATE_DOWN;
1984                 if_link_state_change(ifp);
1985                 /* Turn off watchdogs */
1986                 for (int i = 0; i < adapter->num_queues; i++, txr++)
1987                         txr->watchdog_check = FALSE;
1988         }
1989 }
1990
1991 /*********************************************************************
1992  *
1993  *  This routine disables all traffic on the adapter by issuing a
1994  *  global reset on the MAC and deallocates TX/RX buffers.
1995  *
1996  **********************************************************************/
1997
1998 static void
1999 igb_stop(void *arg)
2000 {
2001         struct adapter  *adapter = arg;
2002         struct ifnet    *ifp = adapter->ifp;
2003         struct tx_ring *txr = adapter->tx_rings;
2004
2005         IGB_CORE_LOCK_ASSERT(adapter);
2006
2007         INIT_DEBUGOUT("igb_stop: begin");
2008
2009         igb_disable_intr(adapter);
2010
2011         callout_stop(&adapter->timer);
2012
2013         /* Tell the stack that the interface is no longer active */
2014         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2015
2016         /* Unarm watchdog timer. */
2017         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2018                 IGB_TX_LOCK(txr);
2019                 txr->watchdog_check = FALSE;
2020                 IGB_TX_UNLOCK(txr);
2021         }
2022
2023         e1000_reset_hw(&adapter->hw);
2024         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2025 }
2026
2027
2028 /*********************************************************************
2029  *
2030  *  Determine hardware revision.
2031  *
2032  **********************************************************************/
2033 static void
2034 igb_identify_hardware(struct adapter *adapter)
2035 {
2036         device_t dev = adapter->dev;
2037
2038         /* Make sure our PCI config space has the necessary stuff set */
2039         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2040         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2041             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2042                 device_printf(dev, "Memory Access and/or Bus Master bits "
2043                     "were not set!\n");
2044                 adapter->hw.bus.pci_cmd_word |=
2045                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2046                 pci_write_config(dev, PCIR_COMMAND,
2047                     adapter->hw.bus.pci_cmd_word, 2);
2048         }
2049
2050         /* Save off the information about this board */
2051         adapter->hw.vendor_id = pci_get_vendor(dev);
2052         adapter->hw.device_id = pci_get_device(dev);
2053         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2054         adapter->hw.subsystem_vendor_id =
2055             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2056         adapter->hw.subsystem_device_id =
2057             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2058
2059         /* Do Shared Code Init and Setup */
2060         if (e1000_set_mac_type(&adapter->hw)) {
2061                 device_printf(dev, "Setup init failure\n");
2062                 return;
2063         }
2064 }
2065
2066 static int
2067 igb_allocate_pci_resources(struct adapter *adapter)
2068 {
2069         device_t        dev = adapter->dev;
2070         int             rid;
2071
2072         rid = PCIR_BAR(0);
2073         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2074             &rid, RF_ACTIVE);
2075         if (adapter->pci_mem == NULL) {
2076                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2077                 return (ENXIO);
2078         }
2079         adapter->osdep.mem_bus_space_tag =
2080             rman_get_bustag(adapter->pci_mem);
2081         adapter->osdep.mem_bus_space_handle =
2082             rman_get_bushandle(adapter->pci_mem);
2083         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2084
2085         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2086
2087         /* This will setup either MSI/X or MSI */
2088         adapter->msix = igb_setup_msix(adapter);
2089         adapter->hw.back = &adapter->osdep;
2090
2091         return (0);
2092 }
2093
2094 /*********************************************************************
2095  *
2096  *  Setup the Legacy or MSI Interrupt handler
2097  *
2098  **********************************************************************/
2099 static int
2100 igb_allocate_legacy(struct adapter *adapter)
2101 {
2102         device_t dev = adapter->dev;
2103         int error, rid = 0;
2104
2105         /* Turn off all interrupts */
2106         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2107
2108 #if 0
2109         /* MSI RID is 1 */
2110         if (adapter->msix == 1)
2111                 rid = 1;
2112 #endif
2113         rid = 0;
2114         /* We allocate a single interrupt resource */
2115         adapter->res = bus_alloc_resource_any(dev,
2116             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2117         if (adapter->res == NULL) {
2118                 device_printf(dev, "Unable to allocate bus resource: "
2119                     "interrupt\n");
2120                 return (ENXIO);
2121         }
2122
2123         /*
2124          * Try allocating a fast interrupt and the associated deferred
2125          * processing contexts.
2126          */
2127         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2128         /* Make tasklet for deferred link handling */
2129         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2130         adapter->tq = taskqueue_create("igb_taskq", M_INTWAIT,
2131             taskqueue_thread_enqueue, &adapter->tq);
2132         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2133             device_get_nameunit(adapter->dev));
2134         if ((error = bus_setup_intr(dev, adapter->res,
2135             /*INTR_TYPE_NET |*/ INTR_MPSAFE, igb_irq_fast,
2136             adapter, &adapter->tag, NULL)) != 0) {
2137                 device_printf(dev, "Failed to register fast interrupt "
2138                             "handler: %d\n", error);
2139                 taskqueue_free(adapter->tq);
2140                 adapter->tq = NULL;
2141                 return (error);
2142         }
2143
2144         return (0);
2145 }
2146
2147
2148 /*********************************************************************
2149  *
2150  *  Setup the MSIX Queue Interrupt handlers: 
2151  *
2152  **********************************************************************/
2153 static int
2154 igb_allocate_msix(struct adapter *adapter)
2155 {
2156         device_t                dev = adapter->dev;
2157         struct igb_queue        *que = adapter->queues;
2158         int                     error, rid, vector = 0;
2159
2160
2161         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2162                 rid = vector + 1;
2163                 que->res = bus_alloc_resource_any(dev,
2164                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2165                 if (que->res == NULL) {
2166                         device_printf(dev,
2167                             "Unable to allocate bus resource: "
2168                             "MSIX Queue Interrupt\n");
2169                         return (ENXIO);
2170                 }
2171                 error = bus_setup_intr(dev, que->res,
2172                     /*INTR_TYPE_NET |*/ INTR_MPSAFE, 
2173                     igb_msix_que, que, &que->tag, NULL);
2174                 if (error) {
2175                         que->res = NULL;
2176                         device_printf(dev, "Failed to register Queue handler");
2177                         return (error);
2178                 }
2179                 que->msix = vector;
2180                 if (adapter->hw.mac.type == e1000_82575)
2181                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2182                 else
2183                         que->eims = 1 << vector;
2184                 /*
2185                 ** Bind the msix vector, and thus the
2186                 ** rings to the corresponding cpu.
2187                 */
2188 #if 0
2189                 if (adapter->num_queues > 1)
2190                         bus_bind_intr(dev, que->res, i);
2191 #endif
2192                 /* Make tasklet for deferred handling */
2193                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2194                 que->tq = taskqueue_create("igb_que", M_INTWAIT,
2195                     taskqueue_thread_enqueue, &que->tq);
2196                 taskqueue_start_threads(&que->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s que",
2197                     device_get_nameunit(adapter->dev));
2198         }
2199
2200         /* And Link */
2201         rid = vector + 1;
2202         adapter->res = bus_alloc_resource_any(dev,
2203             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2204         if (adapter->res == NULL) {
2205                 device_printf(dev,
2206                     "Unable to allocate bus resource: "
2207                     "MSIX Link Interrupt\n");
2208                 return (ENXIO);
2209         }
2210         if ((error = bus_setup_intr(dev, adapter->res,
2211             /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2212             igb_msix_link, adapter, &adapter->tag, NULL)) != 0) {
2213                 device_printf(dev, "Failed to register Link handler");
2214                 return (error);
2215         }
2216         adapter->linkvec = vector;
2217
2218         /* Make tasklet for deferred handling */
2219         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2220         adapter->tq = taskqueue_create("igb_link", M_INTWAIT,
2221             taskqueue_thread_enqueue, &adapter->tq);
2222         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s link",
2223             device_get_nameunit(adapter->dev));
2224
2225         return (0);
2226 }
2227
2228
2229 static void
2230 igb_configure_queues(struct adapter *adapter)
2231 {
2232         struct  e1000_hw        *hw = &adapter->hw;
2233         struct  igb_queue       *que;
2234         u32                     tmp, ivar = 0;
2235         u32                     newitr = IGB_DEFAULT_ITR;
2236
2237         /* First turn on RSS capability */
2238         if (adapter->hw.mac.type > e1000_82575)
2239                 E1000_WRITE_REG(hw, E1000_GPIE,
2240                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2241                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2242
2243         /* Turn on MSIX */
2244         switch (adapter->hw.mac.type) {
2245         case e1000_82580:
2246                 /* RX entries */
2247                 for (int i = 0; i < adapter->num_queues; i++) {
2248                         u32 index = i >> 1;
2249                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2250                         que = &adapter->queues[i];
2251                         if (i & 1) {
2252                                 ivar &= 0xFF00FFFF;
2253                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2254                         } else {
2255                                 ivar &= 0xFFFFFF00;
2256                                 ivar |= que->msix | E1000_IVAR_VALID;
2257                         }
2258                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2259                 }
2260                 /* TX entries */
2261                 for (int i = 0; i < adapter->num_queues; i++) {
2262                         u32 index = i >> 1;
2263                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2264                         que = &adapter->queues[i];
2265                         if (i & 1) {
2266                                 ivar &= 0x00FFFFFF;
2267                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2268                         } else {
2269                                 ivar &= 0xFFFF00FF;
2270                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2271                         }
2272                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2273                         adapter->eims_mask |= que->eims;
2274                 }
2275
2276                 /* And for the link interrupt */
2277                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2278                 adapter->link_mask = 1 << adapter->linkvec;
2279                 adapter->eims_mask |= adapter->link_mask;
2280                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2281                 break;
2282         case e1000_82576:
2283                 /* RX entries */
2284                 for (int i = 0; i < adapter->num_queues; i++) {
2285                         u32 index = i & 0x7; /* Each IVAR has two entries */
2286                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2287                         que = &adapter->queues[i];
2288                         if (i < 8) {
2289                                 ivar &= 0xFFFFFF00;
2290                                 ivar |= que->msix | E1000_IVAR_VALID;
2291                         } else {
2292                                 ivar &= 0xFF00FFFF;
2293                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2294                         }
2295                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2296                         adapter->eims_mask |= que->eims;
2297                 }
2298                 /* TX entries */
2299                 for (int i = 0; i < adapter->num_queues; i++) {
2300                         u32 index = i & 0x7; /* Each IVAR has two entries */
2301                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2302                         que = &adapter->queues[i];
2303                         if (i < 8) {
2304                                 ivar &= 0xFFFF00FF;
2305                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2306                         } else {
2307                                 ivar &= 0x00FFFFFF;
2308                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2309                         }
2310                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2311                         adapter->eims_mask |= que->eims;
2312                 }
2313
2314                 /* And for the link interrupt */
2315                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2316                 adapter->link_mask = 1 << adapter->linkvec;
2317                 adapter->eims_mask |= adapter->link_mask;
2318                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2319                 break;
2320
2321         case e1000_82575:
2322                 /* enable MSI-X support*/
2323                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2324                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2325                 /* Auto-Mask interrupts upon ICR read. */
2326                 tmp |= E1000_CTRL_EXT_EIAME;
2327                 tmp |= E1000_CTRL_EXT_IRCA;
2328                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2329
2330                 /* Queues */
2331                 for (int i = 0; i < adapter->num_queues; i++) {
2332                         que = &adapter->queues[i];
2333                         tmp = E1000_EICR_RX_QUEUE0 << i;
2334                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2335                         que->eims = tmp;
2336                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2337                             i, que->eims);
2338                         adapter->eims_mask |= que->eims;
2339                 }
2340
2341                 /* Link */
2342                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2343                     E1000_EIMS_OTHER);
2344                 adapter->link_mask |= E1000_EIMS_OTHER;
2345                 adapter->eims_mask |= adapter->link_mask;
2346         default:
2347                 break;
2348         }
2349
2350         /* Set the starting interrupt rate */
2351         if (hw->mac.type == e1000_82575)
2352                 newitr |= newitr << 16;
2353         else
2354                 newitr |= 0x8000000;
2355
2356         for (int i = 0; i < adapter->num_queues; i++) {
2357                 que = &adapter->queues[i];
2358                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2359         }
2360
2361         return;
2362 }
2363
2364
2365 static void
2366 igb_free_pci_resources(struct adapter *adapter)
2367 {
2368         struct          igb_queue *que = adapter->queues;
2369         device_t        dev = adapter->dev;
2370         int             rid;
2371
2372         /*
2373         ** There is a slight possibility of a failure mode
2374         ** in attach that will result in entering this function
2375         ** before interrupt resources have been initialized, and
2376         ** in that case we do not want to execute the loops below
2377         ** We can detect this reliably by the state of the adapter
2378         ** res pointer.
2379         */
2380         if (adapter->res == NULL)
2381                 goto mem;
2382
2383         /*
2384          * First release all the interrupt resources:
2385          */
2386         for (int i = 0; i < adapter->num_queues; i++, que++) {
2387                 rid = que->msix + 1;
2388                 if (que->tag != NULL) {
2389                         bus_teardown_intr(dev, que->res, que->tag);
2390                         que->tag = NULL;
2391                 }
2392                 if (que->res != NULL)
2393                         bus_release_resource(dev,
2394                             SYS_RES_IRQ, rid, que->res);
2395         }
2396
2397         /* Clean the Legacy or Link interrupt last */
2398         if (adapter->linkvec) /* we are doing MSIX */
2399                 rid = adapter->linkvec + 1;
2400         else
2401                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2402
2403         if (adapter->tag != NULL) {
2404                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2405                 adapter->tag = NULL;
2406         }
2407         if (adapter->res != NULL)
2408                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2409
2410 mem:
2411         if (adapter->msix)
2412                 pci_release_msi(dev);
2413
2414         if (adapter->msix_mem != NULL)
2415                 bus_release_resource(dev, SYS_RES_MEMORY,
2416                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2417
2418         if (adapter->pci_mem != NULL)
2419                 bus_release_resource(dev, SYS_RES_MEMORY,
2420                     PCIR_BAR(0), adapter->pci_mem);
2421
2422 }
2423
2424 /*
2425  * Setup Either MSI/X or MSI
2426  */
2427 static int
2428 igb_setup_msix(struct adapter *adapter)
2429 {
2430         device_t dev = adapter->dev;
2431         int rid, want, queues, msgs;
2432
2433         /* tuneable override */
2434         if (igb_enable_msix == 0)
2435                 goto msi;
2436
2437         /* First try MSI/X */
2438         rid = PCIR_BAR(IGB_MSIX_BAR);
2439         adapter->msix_mem = bus_alloc_resource_any(dev,
2440             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2441         if (!adapter->msix_mem) {
2442                 /* May not be enabled */
2443                 device_printf(adapter->dev,
2444                     "Unable to map MSIX table \n");
2445                 goto msi;
2446         }
2447
2448         msgs = pci_msix_count(dev); 
2449         if (msgs == 0) { /* system has msix disabled */
2450                 bus_release_resource(dev, SYS_RES_MEMORY,
2451                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2452                 adapter->msix_mem = NULL;
2453                 goto msi;
2454         }
2455
2456         /* Figure out a reasonable auto config value */
2457         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2458
2459         /* Can have max of 4 queues on 82575 */
2460         if (adapter->hw.mac.type == e1000_82575) {
2461                 if (queues > 4)
2462                         queues = 4;
2463                 if (igb_num_queues > 4)
2464                         igb_num_queues = 4;
2465         }
2466
2467         if (igb_num_queues == 0)
2468                 igb_num_queues = queues;
2469
2470         /*
2471         ** One vector (RX/TX pair) per queue
2472         ** plus an additional for Link interrupt
2473         */
2474         want = igb_num_queues + 1;
2475         if (msgs >= want)
2476                 msgs = want;
2477         else {
2478                 device_printf(adapter->dev,
2479                     "MSIX Configuration Problem, "
2480                     "%d vectors configured, but %d queues wanted!\n",
2481                     msgs, want);
2482                 return (ENXIO);
2483         }
2484         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2485                 device_printf(adapter->dev,
2486                     "Using MSIX interrupts with %d vectors\n", msgs);
2487                 adapter->num_queues = igb_num_queues;
2488                 return (msgs);
2489         }
2490 msi:
2491         msgs = pci_msi_count(dev);
2492         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2493                 device_printf(adapter->dev,"Using MSI interrupt\n");
2494         return (msgs);
2495 }
2496
2497 /*********************************************************************
2498  *
2499  *  Set up an fresh starting state
2500  *
2501  **********************************************************************/
2502 static void
2503 igb_reset(struct adapter *adapter)
2504 {
2505         device_t        dev = adapter->dev;
2506         struct e1000_hw *hw = &adapter->hw;
2507         struct e1000_fc_info *fc = &hw->fc;
2508         struct ifnet    *ifp = adapter->ifp;
2509         u32             pba = 0;
2510         u16             hwm;
2511
2512         INIT_DEBUGOUT("igb_reset: begin");
2513
2514         /* Let the firmware know the OS is in control */
2515         igb_get_hw_control(adapter);
2516
2517         /*
2518          * Packet Buffer Allocation (PBA)
2519          * Writing PBA sets the receive portion of the buffer
2520          * the remainder is used for the transmit buffer.
2521          */
2522         switch (hw->mac.type) {
2523         case e1000_82575:
2524                 pba = E1000_PBA_32K;
2525                 break;
2526         case e1000_82576:
2527                 pba = E1000_PBA_64K;
2528                 break;
2529         case e1000_82580:
2530                 pba = E1000_PBA_35K;
2531         default:
2532                 break;
2533         }
2534
2535         /* Special needs in case of Jumbo frames */
2536         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2537                 u32 tx_space, min_tx, min_rx;
2538                 pba = E1000_READ_REG(hw, E1000_PBA);
2539                 tx_space = pba >> 16;
2540                 pba &= 0xffff;
2541                 min_tx = (adapter->max_frame_size +
2542                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2543                 min_tx = roundup2(min_tx, 1024);
2544                 min_tx >>= 10;
2545                 min_rx = adapter->max_frame_size;
2546                 min_rx = roundup2(min_rx, 1024);
2547                 min_rx >>= 10;
2548                 if (tx_space < min_tx &&
2549                     ((min_tx - tx_space) < pba)) {
2550                         pba = pba - (min_tx - tx_space);
2551                         /*
2552                          * if short on rx space, rx wins
2553                          * and must trump tx adjustment
2554                          */
2555                         if (pba < min_rx)
2556                                 pba = min_rx;
2557                 }
2558                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2559         }
2560
2561         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2562
2563         /*
2564          * These parameters control the automatic generation (Tx) and
2565          * response (Rx) to Ethernet PAUSE frames.
2566          * - High water mark should allow for at least two frames to be
2567          *   received after sending an XOFF.
2568          * - Low water mark works best when it is very near the high water mark.
2569          *   This allows the receiver to restart by sending XON when it has
2570          *   drained a bit.
2571          */
2572         hwm = min(((pba << 10) * 9 / 10),
2573             ((pba << 10) - 2 * adapter->max_frame_size));
2574
2575         if (hw->mac.type < e1000_82576) {
2576                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2577                 fc->low_water = fc->high_water - 8;
2578         } else {
2579                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2580                 fc->low_water = fc->high_water - 16;
2581         }
2582
2583         fc->pause_time = IGB_FC_PAUSE_TIME;
2584         fc->send_xon = TRUE;
2585
2586         /* Set Flow control, use the tunable location if sane */
2587         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2588                 fc->requested_mode = igb_fc_setting;
2589         else
2590                 fc->requested_mode = e1000_fc_none;
2591
2592         fc->current_mode = fc->requested_mode;
2593
2594         /* Issue a global reset */
2595         e1000_reset_hw(hw);
2596         E1000_WRITE_REG(hw, E1000_WUC, 0);
2597
2598         if (e1000_init_hw(hw) < 0)
2599                 device_printf(dev, "Hardware Initialization Failed\n");
2600
2601         if (hw->mac.type == e1000_82580) {
2602                 u32 reg;
2603
2604                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2605                 /*
2606                  * 0x80000000 - enable DMA COAL
2607                  * 0x10000000 - use L0s as low power
2608                  * 0x20000000 - use L1 as low power
2609                  * X << 16 - exit dma coal when rx data exceeds X kB
2610                  * Y - upper limit to stay in dma coal in units of 32usecs
2611                  */
2612                 E1000_WRITE_REG(hw, E1000_DMACR,
2613                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2614
2615                 /* set hwm to PBA -  2 * max frame size */
2616                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2617                 /*
2618                  * This sets the time to wait before requesting transition to
2619                  * low power state to number of usecs needed to receive 1 512
2620                  * byte frame at gigabit line rate
2621                  */
2622                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2623
2624                 /* free space in tx packet buffer to wake from DMA coal */
2625                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2626                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2627
2628                 /* make low power state decision controlled by DMA coal */
2629                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2630                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2631                     reg | E1000_PCIEMISC_LX_DECISION);
2632         }
2633
2634         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2635         e1000_get_phy_info(hw);
2636         e1000_check_for_link(hw);
2637         return;
2638 }
2639
2640 /*********************************************************************
2641  *
2642  *  Setup networking device structure and register an interface.
2643  *
2644  **********************************************************************/
2645 static void
2646 igb_setup_interface(device_t dev, struct adapter *adapter)
2647 {
2648         struct ifnet   *ifp;
2649
2650         INIT_DEBUGOUT("igb_setup_interface: begin");
2651
2652         ifp = adapter->ifp = &adapter->arpcom.ac_if;
2653         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2654         ifp->if_mtu = ETHERMTU;
2655         ifp->if_init =  igb_init;
2656         ifp->if_softc = adapter;
2657         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2658         ifp->if_ioctl = igb_ioctl;
2659         ifp->if_start = igb_start;
2660 #ifdef DEVICE_POLLING
2661         ifp->if_poll = igb_poll;
2662 #endif
2663 #if __FreeBSD_version >= 800000
2664         ifp->if_transmit = igb_mq_start;
2665         ifp->if_qflush = igb_qflush;
2666 #endif
2667         ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2668         ifq_set_ready(&ifp->if_snd);
2669
2670         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2671
2672         ifp->if_capabilities = ifp->if_capenable = 0;
2673
2674         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2675 #ifdef NET_TSO
2676         ifp->if_capabilities |= IFCAP_TSO4;
2677 #endif
2678         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2679 #ifdef NET_LRO
2680         if (igb_header_split)
2681                 ifp->if_capabilities |= IFCAP_LRO;
2682 #endif
2683
2684         ifp->if_capenable = ifp->if_capabilities;
2685
2686         /*
2687          * Tell the upper layer(s) we support long frames.
2688          */
2689         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2690         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2691         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2692
2693         /*
2694          * Specify the media types supported by this adapter and register
2695          * callbacks to update media and link information
2696          */
2697         ifmedia_init(&adapter->media, IFM_IMASK,
2698             igb_media_change, igb_media_status);
2699         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2700             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2701                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2702                             0, NULL);
2703                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2704         } else {
2705                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2706                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2707                             0, NULL);
2708                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2709                             0, NULL);
2710                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2711                             0, NULL);
2712                 if (adapter->hw.phy.type != e1000_phy_ife) {
2713                         ifmedia_add(&adapter->media,
2714                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2715                         ifmedia_add(&adapter->media,
2716                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2717                 }
2718         }
2719         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2720         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2721 }
2722
2723
2724 /*
2725  * Manage DMA'able memory.
2726  */
2727 static void
2728 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2729 {
2730         if (error)
2731                 return;
2732         *(bus_addr_t *) arg = segs[0].ds_addr;
2733 }
2734
2735 static int
2736 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2737         struct igb_dma_alloc *dma, int mapflags)
2738 {
2739         int error;
2740
2741         error = bus_dma_tag_create(NULL,                /* parent */
2742                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2743                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2744                                 BUS_SPACE_MAXADDR,      /* highaddr */
2745                                 NULL, NULL,             /* filter, filterarg */
2746                                 size,                   /* maxsize */
2747                                 1,                      /* nsegments */
2748                                 size,                   /* maxsegsize */
2749                                 0,                      /* flags */
2750                                 &dma->dma_tag);
2751         if (error) {
2752                 device_printf(adapter->dev,
2753                     "%s: bus_dma_tag_create failed: %d\n",
2754                     __func__, error);
2755                 goto fail_0;
2756         }
2757
2758         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2759             BUS_DMA_NOWAIT, &dma->dma_map);
2760         if (error) {
2761                 device_printf(adapter->dev,
2762                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2763                     __func__, (uintmax_t)size, error);
2764                 goto fail_2;
2765         }
2766
2767         dma->dma_paddr = 0;
2768         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2769             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2770         if (error || dma->dma_paddr == 0) {
2771                 device_printf(adapter->dev,
2772                     "%s: bus_dmamap_load failed: %d\n",
2773                     __func__, error);
2774                 goto fail_3;
2775         }
2776
2777         return (0);
2778
2779 fail_3:
2780         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2781 fail_2:
2782         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2783         bus_dma_tag_destroy(dma->dma_tag);
2784 fail_0:
2785         dma->dma_map = NULL;
2786         dma->dma_tag = NULL;
2787
2788         return (error);
2789 }
2790
2791 static void
2792 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2793 {
2794         if (dma->dma_tag == NULL)
2795                 return;
2796         if (dma->dma_map != NULL) {
2797                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2798                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2799                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2800                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2801                 dma->dma_map = NULL;
2802         }
2803         bus_dma_tag_destroy(dma->dma_tag);
2804         dma->dma_tag = NULL;
2805 }
2806
2807
2808 /*********************************************************************
2809  *
2810  *  Allocate memory for the transmit and receive rings, and then
2811  *  the descriptors associated with each, called only once at attach.
2812  *
2813  **********************************************************************/
2814 static int
2815 igb_allocate_queues(struct adapter *adapter)
2816 {
2817         device_t dev = adapter->dev;
2818         struct igb_queue        *que = NULL;
2819         struct tx_ring          *txr = NULL;
2820         struct rx_ring          *rxr = NULL;
2821         int rsize, tsize, error = E1000_SUCCESS;
2822         int txconf = 0, rxconf = 0;
2823
2824         /* First allocate the top level queue structs */
2825         if (!(adapter->queues =
2826             (struct igb_queue *) kmalloc(sizeof(struct igb_queue) *
2827             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2828                 device_printf(dev, "Unable to allocate queue memory\n");
2829                 error = ENOMEM;
2830                 goto fail;
2831         }
2832
2833         /* Next allocate the TX ring struct memory */
2834         if (!(adapter->tx_rings =
2835             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2836             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2837                 device_printf(dev, "Unable to allocate TX ring memory\n");
2838                 error = ENOMEM;
2839                 goto tx_fail;
2840         }
2841
2842         /* Now allocate the RX */
2843         if (!(adapter->rx_rings =
2844             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2845             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2846                 device_printf(dev, "Unable to allocate RX ring memory\n");
2847                 error = ENOMEM;
2848                 goto rx_fail;
2849         }
2850
2851         tsize = roundup2(adapter->num_tx_desc *
2852             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2853         /*
2854          * Now set up the TX queues, txconf is needed to handle the
2855          * possibility that things fail midcourse and we need to
2856          * undo memory gracefully
2857          */ 
2858         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2859                 /* Set up some basics */
2860                 txr = &adapter->tx_rings[i];
2861                 txr->adapter = adapter;
2862                 txr->me = i;
2863
2864                 /* Initialize the TX lock */
2865                 ksnprintf(txr->spin_name, sizeof(txr->spin_name), "%s:tx(%d)",
2866                     device_get_nameunit(dev), txr->me);
2867
2868                 IGB_TX_LOCK_INIT(txr);
2869
2870                 if (igb_dma_malloc(adapter, tsize,
2871                         &txr->txdma, BUS_DMA_NOWAIT)) {
2872                         device_printf(dev,
2873                             "Unable to allocate TX Descriptor memory\n");
2874                         error = ENOMEM;
2875                         goto err_tx_desc;
2876                 }
2877                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2878                 bzero((void *)txr->tx_base, tsize);
2879
2880                 /* Now allocate transmit buffers for the ring */
2881                 if (igb_allocate_transmit_buffers(txr)) {
2882                         device_printf(dev,
2883                             "Critical Failure setting up transmit buffers\n");
2884                         error = ENOMEM;
2885                         goto err_tx_desc;
2886                 }
2887 #if __FreeBSD_version >= 800000
2888                 /* Allocate a buf ring */
2889                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2890                     M_WAITOK, &txr->tx_mtx);
2891 #endif
2892         }
2893
2894         /*
2895          * Next the RX queues...
2896          */ 
2897         rsize = roundup2(adapter->num_rx_desc *
2898             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2899         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2900                 rxr = &adapter->rx_rings[i];
2901                 rxr->adapter = adapter;
2902                 rxr->me = i;
2903
2904                 /* Initialize the RX lock */
2905                 ksnprintf(rxr->spin_name, sizeof(rxr->spin_name), "%s:rx(%d)",
2906                     device_get_nameunit(dev), txr->me);
2907
2908                 IGB_RX_LOCK_INIT(rxr);
2909
2910                 if (igb_dma_malloc(adapter, rsize,
2911                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2912                         device_printf(dev,
2913                             "Unable to allocate RxDescriptor memory\n");
2914                         error = ENOMEM;
2915                         goto err_rx_desc;
2916                 }
2917                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2918                 bzero((void *)rxr->rx_base, rsize);
2919
2920                 /* Allocate receive buffers for the ring*/
2921                 if (igb_allocate_receive_buffers(rxr)) {
2922                         device_printf(dev,
2923                             "Critical Failure setting up receive buffers\n");
2924                         error = ENOMEM;
2925                         goto err_rx_desc;
2926                 }
2927         }
2928
2929         /*
2930         ** Finally set up the queue holding structs
2931         */
2932         for (int i = 0; i < adapter->num_queues; i++) {
2933                 que = &adapter->queues[i];
2934                 que->adapter = adapter;
2935                 que->txr = &adapter->tx_rings[i];
2936                 que->rxr = &adapter->rx_rings[i];
2937         }
2938
2939         return (0);
2940
2941 err_rx_desc:
2942         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2943                 igb_dma_free(adapter, &rxr->rxdma);
2944 err_tx_desc:
2945         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2946                 igb_dma_free(adapter, &txr->txdma);
2947         kfree(adapter->rx_rings, M_DEVBUF);
2948 rx_fail:
2949 #if __FreeBSD_version >= 800000
2950         buf_ring_free(txr->br, M_DEVBUF);
2951 #endif
2952         kfree(adapter->tx_rings, M_DEVBUF);
2953 tx_fail:
2954         kfree(adapter->queues, M_DEVBUF);
2955 fail:
2956         return (error);
2957 }
2958
2959 /*********************************************************************
2960  *
2961  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2962  *  the information needed to transmit a packet on the wire. This is
2963  *  called only once at attach, setup is done every reset.
2964  *
2965  **********************************************************************/
2966 static int
2967 igb_allocate_transmit_buffers(struct tx_ring *txr)
2968 {
2969         struct adapter *adapter = txr->adapter;
2970         device_t dev = adapter->dev;
2971         struct igb_tx_buffer *txbuf;
2972         int error, i;
2973
2974         /*
2975          * Setup DMA descriptor areas.
2976          */
2977         if ((error = bus_dma_tag_create(NULL,
2978                                1, 0,                    /* alignment, bounds */
2979                                BUS_SPACE_MAXADDR,       /* lowaddr */
2980                                BUS_SPACE_MAXADDR,       /* highaddr */
2981                                NULL, NULL,              /* filter, filterarg */
2982                                IGB_TSO_SIZE,            /* maxsize */
2983                                IGB_MAX_SCATTER,         /* nsegments */
2984                                PAGE_SIZE,               /* maxsegsize */
2985                                0,                       /* flags */
2986                                &txr->txtag))) {
2987                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2988                 goto fail;
2989         }
2990
2991         if (!(txr->tx_buffers =
2992             (struct igb_tx_buffer *) kmalloc(sizeof(struct igb_tx_buffer) *
2993             adapter->num_tx_desc, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2994                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2995                 error = ENOMEM;
2996                 goto fail;
2997         }
2998
2999         /* Create the descriptor buffer dma maps */
3000         txbuf = txr->tx_buffers;
3001         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3002                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3003                 if (error != 0) {
3004                         device_printf(dev, "Unable to create TX DMA map\n");
3005                         goto fail;
3006                 }
3007         }
3008
3009         return 0;
3010 fail:
3011         /* We free all, it handles case where we are in the middle */
3012         igb_free_transmit_structures(adapter);
3013         return (error);
3014 }
3015
3016 /*********************************************************************
3017  *
3018  *  Initialize a transmit ring.
3019  *
3020  **********************************************************************/
3021 static void
3022 igb_setup_transmit_ring(struct tx_ring *txr)
3023 {
3024         struct adapter *adapter = txr->adapter;
3025         struct igb_tx_buffer *txbuf;
3026         int i;
3027
3028         /* Clear the old descriptor contents */
3029         IGB_TX_LOCK(txr);
3030         bzero((void *)txr->tx_base,
3031               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3032         /* Reset indices */
3033         txr->next_avail_desc = 0;
3034         txr->next_to_clean = 0;
3035
3036         /* Free any existing tx buffers. */
3037         txbuf = txr->tx_buffers;
3038         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3039                 if (txbuf->m_head != NULL) {
3040                         bus_dmamap_sync(txr->txtag, txbuf->map,
3041                             BUS_DMASYNC_POSTWRITE);
3042                         bus_dmamap_unload(txr->txtag, txbuf->map);
3043                         m_freem(txbuf->m_head);
3044                         txbuf->m_head = NULL;
3045                 }
3046                 /* clear the watch index */
3047                 txbuf->next_eop = -1;
3048         }
3049
3050         /* Set number of descriptors available */
3051         txr->tx_avail = adapter->num_tx_desc;
3052
3053         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3054             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3055         IGB_TX_UNLOCK(txr);
3056 }
3057
3058 /*********************************************************************
3059  *
3060  *  Initialize all transmit rings.
3061  *
3062  **********************************************************************/
3063 static void
3064 igb_setup_transmit_structures(struct adapter *adapter)
3065 {
3066         struct tx_ring *txr = adapter->tx_rings;
3067
3068         for (int i = 0; i < adapter->num_queues; i++, txr++)
3069                 igb_setup_transmit_ring(txr);
3070
3071         return;
3072 }
3073
3074 /*********************************************************************
3075  *
3076  *  Enable transmit unit.
3077  *
3078  **********************************************************************/
3079 static void
3080 igb_initialize_transmit_units(struct adapter *adapter)
3081 {
3082         struct tx_ring  *txr = adapter->tx_rings;
3083         struct e1000_hw *hw = &adapter->hw;
3084         u32             tctl, txdctl;
3085
3086         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3087
3088         /* Setup the Tx Descriptor Rings */
3089         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3090                 u64 bus_addr = txr->txdma.dma_paddr;
3091
3092                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3093                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3094                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3095                     (uint32_t)(bus_addr >> 32));
3096                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3097                     (uint32_t)bus_addr);
3098
3099                 /* Setup the HW Tx Head and Tail descriptor pointers */
3100                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3101                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3102
3103                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3104                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3105                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3106
3107                 txr->watchdog_check = FALSE;
3108
3109                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3110                 txdctl |= IGB_TX_PTHRESH;
3111                 txdctl |= IGB_TX_HTHRESH << 8;
3112                 txdctl |= IGB_TX_WTHRESH << 16;
3113                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3114                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3115         }
3116
3117         /* Program the Transmit Control Register */
3118         tctl = E1000_READ_REG(hw, E1000_TCTL);
3119         tctl &= ~E1000_TCTL_CT;
3120         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3121                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3122
3123         e1000_config_collision_dist(hw);
3124
3125         /* This write will effectively turn on the transmit unit. */
3126         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3127 }
3128
3129 /*********************************************************************
3130  *
3131  *  Free all transmit rings.
3132  *
3133  **********************************************************************/
3134 static void
3135 igb_free_transmit_structures(struct adapter *adapter)
3136 {
3137         struct tx_ring *txr = adapter->tx_rings;
3138
3139         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3140                 IGB_TX_LOCK(txr);
3141                 igb_free_transmit_buffers(txr);
3142                 igb_dma_free(adapter, &txr->txdma);
3143                 IGB_TX_UNLOCK(txr);
3144                 IGB_TX_LOCK_DESTROY(txr);
3145         }
3146         kfree(adapter->tx_rings, M_DEVBUF);
3147 }
3148
3149 /*********************************************************************
3150  *
3151  *  Free transmit ring related data structures.
3152  *
3153  **********************************************************************/
3154 static void
3155 igb_free_transmit_buffers(struct tx_ring *txr)
3156 {
3157         struct adapter *adapter = txr->adapter;
3158         struct igb_tx_buffer *tx_buffer;
3159         int             i;
3160
3161         INIT_DEBUGOUT("free_transmit_ring: begin");
3162
3163         if (txr->tx_buffers == NULL)
3164                 return;
3165
3166         tx_buffer = txr->tx_buffers;
3167         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3168                 if (tx_buffer->m_head != NULL) {
3169                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3170                             BUS_DMASYNC_POSTWRITE);
3171                         bus_dmamap_unload(txr->txtag,
3172                             tx_buffer->map);
3173                         m_freem(tx_buffer->m_head);
3174                         tx_buffer->m_head = NULL;
3175                         if (tx_buffer->map != NULL) {
3176                                 bus_dmamap_destroy(txr->txtag,
3177                                     tx_buffer->map);
3178                                 tx_buffer->map = NULL;
3179                         }
3180                 } else if (tx_buffer->map != NULL) {
3181                         bus_dmamap_unload(txr->txtag,
3182                             tx_buffer->map);
3183                         bus_dmamap_destroy(txr->txtag,
3184                             tx_buffer->map);
3185                         tx_buffer->map = NULL;
3186                 }
3187         }
3188 #if __FreeBSD_version >= 800000
3189         if (txr->br != NULL)
3190                 buf_ring_free(txr->br, M_DEVBUF);
3191 #endif
3192         if (txr->tx_buffers != NULL) {
3193                 kfree(txr->tx_buffers, M_DEVBUF);
3194                 txr->tx_buffers = NULL;
3195         }
3196         if (txr->txtag != NULL) {
3197                 bus_dma_tag_destroy(txr->txtag);
3198                 txr->txtag = NULL;
3199         }
3200         return;
3201 }
3202
3203 /**********************************************************************
3204  *
3205  *  Setup work for hardware segmentation offload (TSO)
3206  *
3207  **********************************************************************/
3208 #ifdef NET_TSO 
3209 static boolean_t
3210 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3211 {
3212         struct adapter *adapter = txr->adapter;
3213         struct e1000_adv_tx_context_desc *TXD;
3214         struct igb_tx_buffer        *tx_buffer;
3215         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3216         u32 mss_l4len_idx = 0;
3217         u16 vtag = 0;
3218         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3219         struct ether_vlan_header *eh;
3220         struct ip *ip;
3221         struct tcphdr *th;
3222
3223
3224         /*
3225          * Determine where frame payload starts.
3226          * Jump over vlan headers if already present
3227          */
3228         eh = mtod(mp, struct ether_vlan_header *);
3229         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3230                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3231         else
3232                 ehdrlen = ETHER_HDR_LEN;
3233
3234         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3235         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3236                 return FALSE;
3237
3238         /* Only supports IPV4 for now */
3239         ctxd = txr->next_avail_desc;
3240         tx_buffer = &txr->tx_buffers[ctxd];
3241         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3242
3243         ip = (struct ip *)(mp->m_data + ehdrlen);
3244         if (ip->ip_p != IPPROTO_TCP)
3245                 return FALSE;   /* 0 */
3246         ip->ip_sum = 0;
3247         ip_hlen = ip->ip_hl << 2;
3248         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3249         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3250             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3251         tcp_hlen = th->th_off << 2;
3252         /*
3253          * Calculate header length, this is used
3254          * in the transmit desc in igb_xmit
3255          */
3256         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3257
3258         /* VLAN MACLEN IPLEN */
3259         if (mp->m_flags & M_VLANTAG) {
3260                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3261                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3262         }
3263
3264         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3265         vlan_macip_lens |= ip_hlen;
3266         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3267
3268         /* ADV DTYPE TUCMD */
3269         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3270         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3271         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3272         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3273
3274         /* MSS L4LEN IDX */
3275         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3276         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3277         /* 82575 needs the queue index added */
3278         if (adapter->hw.mac.type == e1000_82575)
3279                 mss_l4len_idx |= txr->me << 4;
3280         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3281
3282         TXD->seqnum_seed = htole32(0);
3283         tx_buffer->m_head = NULL;
3284         tx_buffer->next_eop = -1;
3285
3286         if (++ctxd == adapter->num_tx_desc)
3287                 ctxd = 0;
3288
3289         txr->tx_avail--;
3290         txr->next_avail_desc = ctxd;
3291         return TRUE;
3292 }
3293 #endif
3294
3295 /*********************************************************************
3296  *
3297  *  Context Descriptor setup for VLAN or CSUM
3298  *
3299  **********************************************************************/
3300
3301 static bool
3302 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3303 {
3304         struct adapter *adapter = txr->adapter;
3305         struct e1000_adv_tx_context_desc *TXD;
3306         struct igb_tx_buffer        *tx_buffer;
3307         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3308         struct ether_vlan_header *eh;
3309         struct ip *ip = NULL;
3310         struct ip6_hdr *ip6;
3311         int  ehdrlen, ctxd, ip_hlen = 0;
3312         u16     etype, vtag = 0;
3313         u8      ipproto = 0;
3314         bool    offload = TRUE;
3315
3316         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3317                 offload = FALSE;
3318
3319         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3320         ctxd = txr->next_avail_desc;
3321         tx_buffer = &txr->tx_buffers[ctxd];
3322         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3323
3324         /*
3325         ** In advanced descriptors the vlan tag must 
3326         ** be placed into the context descriptor, thus
3327         ** we need to be here just for that setup.
3328         */
3329         if (mp->m_flags & M_VLANTAG) {
3330                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3331                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3332         } else if (offload == FALSE)
3333                 return FALSE;
3334
3335         /*
3336          * Determine where frame payload starts.
3337          * Jump over vlan headers if already present,
3338          * helpful for QinQ too.
3339          */
3340         eh = mtod(mp, struct ether_vlan_header *);
3341         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3342                 etype = ntohs(eh->evl_proto);
3343                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3344         } else {
3345                 etype = ntohs(eh->evl_encap_proto);
3346                 ehdrlen = ETHER_HDR_LEN;
3347         }
3348
3349         /* Set the ether header length */
3350         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3351
3352         switch (etype) {
3353                 case ETHERTYPE_IP:
3354                         ip = (struct ip *)(mp->m_data + ehdrlen);
3355                         ip_hlen = ip->ip_hl << 2;
3356                         if (mp->m_len < ehdrlen + ip_hlen) {
3357                                 offload = FALSE;
3358                                 break;
3359                         }
3360                         ipproto = ip->ip_p;
3361                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3362                         break;
3363                 case ETHERTYPE_IPV6:
3364                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3365                         ip_hlen = sizeof(struct ip6_hdr);
3366                         if (mp->m_len < ehdrlen + ip_hlen)
3367                                 return (FALSE);
3368                         ipproto = ip6->ip6_nxt;
3369                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3370                         break;
3371                 default:
3372                         offload = FALSE;
3373                         break;
3374         }
3375
3376         vlan_macip_lens |= ip_hlen;
3377         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3378
3379         switch (ipproto) {
3380                 case IPPROTO_TCP:
3381                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3382                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3383                         break;
3384                 case IPPROTO_UDP:
3385                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3386                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3387                         break;
3388 #if __FreeBSD_version >= 800000
3389                 case IPPROTO_SCTP:
3390                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3391                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3392                         break;
3393 #endif
3394                 default:
3395                         offload = FALSE;
3396                         break;
3397         }
3398
3399         /* 82575 needs the queue index added */
3400         if (adapter->hw.mac.type == e1000_82575)
3401                 mss_l4len_idx = txr->me << 4;
3402
3403         /* Now copy bits into descriptor */
3404         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3405         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3406         TXD->seqnum_seed = htole32(0);
3407         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3408
3409         tx_buffer->m_head = NULL;
3410         tx_buffer->next_eop = -1;
3411
3412         /* We've consumed the first desc, adjust counters */
3413         if (++ctxd == adapter->num_tx_desc)
3414                 ctxd = 0;
3415         txr->next_avail_desc = ctxd;
3416         --txr->tx_avail;
3417
3418         return (offload);
3419 }
3420
3421
3422 /**********************************************************************
3423  *
3424  *  Examine each tx_buffer in the used queue. If the hardware is done
3425  *  processing the packet then free associated resources. The
3426  *  tx_buffer is put back on the free queue.
3427  *
3428  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3429  **********************************************************************/
3430 static bool
3431 igb_txeof(struct tx_ring *txr)
3432 {
3433         struct adapter  *adapter = txr->adapter;
3434         int first, last, done;
3435         struct igb_tx_buffer *tx_buffer;
3436         struct e1000_tx_desc   *tx_desc, *eop_desc;
3437         struct ifnet   *ifp = adapter->ifp;
3438
3439         IGB_TX_LOCK_ASSERT(txr);
3440
3441         if (txr->tx_avail == adapter->num_tx_desc)
3442                 return FALSE;
3443
3444         first = txr->next_to_clean;
3445         tx_desc = &txr->tx_base[first];
3446         tx_buffer = &txr->tx_buffers[first];
3447         last = tx_buffer->next_eop;
3448         eop_desc = &txr->tx_base[last];
3449
3450         /*
3451          * What this does is get the index of the
3452          * first descriptor AFTER the EOP of the 
3453          * first packet, that way we can do the
3454          * simple comparison on the inner while loop.
3455          */
3456         if (++last == adapter->num_tx_desc)
3457                 last = 0;
3458         done = last;
3459
3460         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3461             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3462
3463         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3464                 /* We clean the range of the packet */
3465                 while (first != done) {
3466                         tx_desc->upper.data = 0;
3467                         tx_desc->lower.data = 0;
3468                         tx_desc->buffer_addr = 0;
3469                         ++txr->tx_avail;
3470
3471                         if (tx_buffer->m_head) {
3472                                 txr->bytes +=
3473                                     tx_buffer->m_head->m_pkthdr.len;
3474                                 bus_dmamap_sync(txr->txtag,
3475                                     tx_buffer->map,
3476                                     BUS_DMASYNC_POSTWRITE);
3477                                 bus_dmamap_unload(txr->txtag,
3478                                     tx_buffer->map);
3479
3480                                 m_freem(tx_buffer->m_head);
3481                                 tx_buffer->m_head = NULL;
3482                         }
3483                         tx_buffer->next_eop = -1;
3484                         txr->watchdog_time = ticks;
3485
3486                         if (++first == adapter->num_tx_desc)
3487                                 first = 0;
3488
3489                         tx_buffer = &txr->tx_buffers[first];
3490                         tx_desc = &txr->tx_base[first];
3491                 }
3492                 ++txr->packets;
3493                 ++ifp->if_opackets;
3494                 /* See if we can continue to the next packet */
3495                 last = tx_buffer->next_eop;
3496                 if (last != -1) {
3497                         eop_desc = &txr->tx_base[last];
3498                         /* Get new done point */
3499                         if (++last == adapter->num_tx_desc) last = 0;
3500                         done = last;
3501                 } else
3502                         break;
3503         }
3504         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3505             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3506
3507         txr->next_to_clean = first;
3508
3509         /*
3510          * If we have enough room, clear IFF_DRV_OACTIVE
3511          * to tell the stack that it is OK to send packets.
3512          */
3513         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3514                 ifp->if_flags &= ~IFF_OACTIVE;
3515                 /* All clean, turn off the watchdog */
3516                 if (txr->tx_avail == adapter->num_tx_desc) {
3517                         txr->watchdog_check = FALSE;
3518                         return FALSE;
3519                 }
3520         }
3521
3522         return (TRUE);
3523 }
3524
3525
3526 /*********************************************************************
3527  *
3528  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3529  *              i - designates the ring index
3530  *              clean - tells the function whether to update
3531  *                      the header, the packet buffer, or both.
3532  *
3533  **********************************************************************/
3534 static int
3535 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3536 {
3537         struct adapter          *adapter = rxr->adapter;
3538         struct igb_rx_buf       *rxbuf;
3539         struct mbuf             *mh, *mp;
3540         bus_dma_segment_t       hseg[1];
3541         bus_dma_segment_t       pseg[1];
3542         bus_dmamap_t            map;
3543         int                     nsegs, error;
3544
3545
3546         rxbuf = &rxr->rx_buffers[i];
3547         mh = mp = NULL;
3548         if ((clean & IGB_CLEAN_HEADER) != 0) {
3549                 mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3550                 if (mh == NULL) {
3551                         adapter->mbuf_header_failed++;          
3552                         return (ENOBUFS);
3553                 }
3554                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3555                 /*
3556                  * Because IGB_HDR_BUF size is less than MHLEN
3557                  * and we configure controller to split headers
3558                  * we can align mbuf on ETHER_ALIGN boundary.
3559                  */
3560                 m_adj(mh, ETHER_ALIGN);
3561                 error = bus_dmamap_load_mbuf_segment(rxr->rx_htag,
3562                     rxr->rx_hspare_map, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3563                 if (error != 0) {
3564                         m_freem(mh);
3565                         return (error);
3566                 }
3567                 mh->m_flags &= ~M_PKTHDR;
3568         }
3569         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3570                 mp = m_getl(adapter->rx_mbuf_sz,
3571                     MB_DONTWAIT, MT_DATA, M_PKTHDR, NULL);
3572 #if 0
3573                 mp = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR,
3574                     adapter->rx_mbuf_sz);
3575 #endif
3576                 if (mp == NULL) {
3577                         if (mh != NULL) {
3578                                 adapter->mbuf_packet_failed++;          
3579                                 bus_dmamap_unload(rxr->rx_htag,
3580                                     rxbuf->head_map);
3581                                 mh->m_flags |= M_PKTHDR;
3582                                 m_freem(mh);
3583                         }
3584                         return (ENOBUFS);
3585                 }
3586                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3587                 error = bus_dmamap_load_mbuf_segment(rxr->rx_ptag,
3588                     rxr->rx_pspare_map, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3589                 if (error != 0) {
3590                         if (mh != NULL) {
3591                                 bus_dmamap_unload(rxr->rx_htag,
3592                                     rxbuf->head_map);
3593                                 mh->m_flags |= M_PKTHDR;
3594                                 m_freem(mh);
3595                         }
3596                         m_freem(mp);
3597                         return (error);
3598                 }
3599                 mp->m_flags &= ~M_PKTHDR;
3600         }
3601
3602         /* Loading new DMA maps complete, unload maps for received buffers. */
3603         if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3604                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3605                     BUS_DMASYNC_POSTREAD);
3606                 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3607         }
3608         if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3609                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3610                     BUS_DMASYNC_POSTREAD);
3611                 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3612         }
3613
3614         /* Reflect loaded dmamaps. */
3615         if ((clean & IGB_CLEAN_HEADER) != 0) {
3616                 map = rxbuf->head_map;
3617                 rxbuf->head_map = rxr->rx_hspare_map;
3618                 rxr->rx_hspare_map = map;
3619                 rxbuf->m_head = mh;
3620                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3621                     BUS_DMASYNC_PREREAD);
3622                 rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3623         }
3624         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3625                 map = rxbuf->pack_map;
3626                 rxbuf->pack_map = rxr->rx_pspare_map;
3627                 rxr->rx_pspare_map = map;
3628                 rxbuf->m_pack = mp;
3629                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3630                     BUS_DMASYNC_PREREAD);
3631                 rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3632         }
3633
3634         return (0);
3635 }
3636
3637 /*********************************************************************
3638  *
3639  *  Allocate memory for rx_buffer structures. Since we use one
3640  *  rx_buffer per received packet, the maximum number of rx_buffer's
3641  *  that we'll need is equal to the number of receive descriptors
3642  *  that we've allocated.
3643  *
3644  **********************************************************************/
3645 static int
3646 igb_allocate_receive_buffers(struct rx_ring *rxr)
3647 {
3648         struct  adapter         *adapter = rxr->adapter;
3649         device_t                dev = adapter->dev;
3650         struct igb_rx_buf       *rxbuf;
3651         int                     i, bsize, error;
3652
3653         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3654         if (!(rxr->rx_buffers =
3655             (struct igb_rx_buf *) kmalloc(bsize,
3656             M_DEVBUF, M_INTWAIT | M_ZERO))) {
3657                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3658                 error = ENOMEM;
3659                 goto fail;
3660         }
3661
3662         if ((error = bus_dma_tag_create(NULL,
3663                                    1, 0,                /* alignment, bounds */
3664                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3665                                    BUS_SPACE_MAXADDR,   /* highaddr */
3666                                    NULL, NULL,          /* filter, filterarg */
3667                                    MSIZE,               /* maxsize */
3668                                    1,                   /* nsegments */
3669                                    MSIZE,               /* maxsegsize */
3670                                    0,                   /* flags */
3671                                    &rxr->rx_htag))) {
3672                 device_printf(dev, "Unable to create RX DMA tag\n");
3673                 goto fail;
3674         }
3675
3676         if ((error = bus_dma_tag_create(NULL,
3677                                    1, 0,                /* alignment, bounds */
3678                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3679                                    BUS_SPACE_MAXADDR,   /* highaddr */
3680                                    NULL, NULL,          /* filter, filterarg */
3681                                    MJUMPAGESIZE,        /* maxsize */
3682                                    1,                   /* nsegments */
3683                                    MJUMPAGESIZE,        /* maxsegsize */
3684                                    0,                   /* flags */
3685                                    &rxr->rx_ptag))) {
3686                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3687                 goto fail;
3688         }
3689
3690         /* Create the spare maps (used by getbuf) */
3691         error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3692              &rxr->rx_hspare_map);
3693         if (error) {
3694                 device_printf(dev,
3695                     "%s: bus_dmamap_create header spare failed: %d\n",
3696                     __func__, error);
3697                 goto fail;
3698         }
3699         error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3700              &rxr->rx_pspare_map);
3701         if (error) {
3702                 device_printf(dev,
3703                     "%s: bus_dmamap_create packet spare failed: %d\n",
3704                     __func__, error);
3705                 goto fail;
3706         }
3707
3708         for (i = 0; i < adapter->num_rx_desc; i++) {
3709                 rxbuf = &rxr->rx_buffers[i];
3710                 error = bus_dmamap_create(rxr->rx_htag,
3711                     BUS_DMA_NOWAIT, &rxbuf->head_map);
3712                 if (error) {
3713                         device_printf(dev,
3714                             "Unable to create RX head DMA maps\n");
3715                         goto fail;
3716                 }
3717                 error = bus_dmamap_create(rxr->rx_ptag,
3718                     BUS_DMA_NOWAIT, &rxbuf->pack_map);
3719                 if (error) {
3720                         device_printf(dev,
3721                             "Unable to create RX packet DMA maps\n");
3722                         goto fail;
3723                 }
3724         }
3725
3726         return (0);
3727
3728 fail:
3729         /* Frees all, but can handle partial completion */
3730         igb_free_receive_structures(adapter);
3731         return (error);
3732 }
3733
3734
3735 static void
3736 igb_free_receive_ring(struct rx_ring *rxr)
3737 {
3738         struct  adapter         *adapter;
3739         struct igb_rx_buf       *rxbuf;
3740         int i;
3741
3742         adapter = rxr->adapter;
3743         for (i = 0; i < adapter->num_rx_desc; i++) {
3744                 rxbuf = &rxr->rx_buffers[i];
3745                 if (rxbuf->m_head != NULL) {
3746                         bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3747                             BUS_DMASYNC_POSTREAD);
3748                         bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3749                         rxbuf->m_head->m_flags |= M_PKTHDR;
3750                         m_freem(rxbuf->m_head);
3751                 }
3752                 if (rxbuf->m_pack != NULL) {
3753                         bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3754                             BUS_DMASYNC_POSTREAD);
3755                         bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3756                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3757                         m_freem(rxbuf->m_pack);
3758                 }
3759                 rxbuf->m_head = NULL;
3760                 rxbuf->m_pack = NULL;
3761         }
3762 }
3763
3764
3765 /*********************************************************************
3766  *
3767  *  Initialize a receive ring and its buffers.
3768  *
3769  **********************************************************************/
3770 static int
3771 igb_setup_receive_ring(struct rx_ring *rxr)
3772 {
3773         struct  adapter         *adapter;
3774         struct  ifnet           *ifp;
3775         device_t                dev;
3776 #ifdef NET_LRO 
3777         struct lro_ctrl         *lro = &rxr->lro;
3778 #endif
3779         int                     j, rsize, error = 0;
3780
3781         adapter = rxr->adapter;
3782         dev = adapter->dev;
3783         ifp = adapter->ifp;
3784
3785         /* Clear the ring contents */
3786         IGB_RX_LOCK(rxr);
3787         rsize = roundup2(adapter->num_rx_desc *
3788             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3789         bzero((void *)rxr->rx_base, rsize);
3790
3791         /*
3792         ** Free current RX buffer structures and their mbufs
3793         */
3794         igb_free_receive_ring(rxr);
3795
3796         /* Now replenish the ring mbufs */
3797         for (j = 0; j < adapter->num_rx_desc; j++) {
3798                 if ((error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH)) != 0)
3799                         goto fail;
3800         }
3801
3802         /* Setup our descriptor indices */
3803         rxr->next_to_check = 0;
3804         rxr->last_cleaned = 0;
3805         rxr->lro_enabled = FALSE;
3806
3807         if (igb_header_split)
3808                 rxr->hdr_split = TRUE;
3809 #if NET_LRO 
3810         else
3811                 ifp->if_capabilities &= ~IFCAP_LRO;
3812 #endif
3813
3814         rxr->fmp = NULL;
3815         rxr->lmp = NULL;
3816         rxr->discard = FALSE;
3817
3818         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3819             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3820
3821         /*
3822         ** Now set up the LRO interface, we
3823         ** also only do head split when LRO
3824         ** is enabled, since so often they
3825         ** are undesireable in similar setups.
3826         */
3827 #if NET_LRO 
3828         if (ifp->if_capenable & IFCAP_LRO) {
3829                 int err = tcp_lro_init(lro);
3830                 if (err) {
3831                         device_printf(dev, "LRO Initialization failed!\n");
3832                         goto fail;
3833                 }
3834                 INIT_DEBUGOUT("RX LRO Initialized\n");
3835                 rxr->lro_enabled = TRUE;
3836                 lro->ifp = adapter->ifp;
3837         }
3838 #endif
3839
3840         IGB_RX_UNLOCK(rxr);
3841         return (0);
3842
3843 fail:
3844         igb_free_receive_ring(rxr);
3845         IGB_RX_UNLOCK(rxr);
3846         return (error);
3847 }
3848
3849 /*********************************************************************
3850  *
3851  *  Initialize all receive rings.
3852  *
3853  **********************************************************************/
3854 static int
3855 igb_setup_receive_structures(struct adapter *adapter)
3856 {
3857         struct rx_ring *rxr = adapter->rx_rings;
3858         int i, j;
3859
3860         for (i = 0; i < adapter->num_queues; i++, rxr++)
3861                 if (igb_setup_receive_ring(rxr))
3862                         goto fail;
3863
3864         return (0);
3865 fail:
3866         /*
3867          * Free RX buffers allocated so far, we will only handle
3868          * the rings that completed, the failing case will have
3869          * cleaned up for itself. The value of 'i' will be the
3870          * failed ring so we must pre-decrement it.
3871          */
3872         rxr = adapter->rx_rings;
3873         for (--i; i > 0; i--, rxr++) {
3874                 for (j = 0; j < adapter->num_rx_desc; j++)
3875                         igb_free_receive_ring(rxr);
3876         }
3877
3878         return (ENOBUFS);
3879 }
3880
3881 /*********************************************************************
3882  *
3883  *  Enable receive unit.
3884  *
3885  **********************************************************************/
3886 static void
3887 igb_initialize_receive_units(struct adapter *adapter)
3888 {
3889         struct rx_ring  *rxr = adapter->rx_rings;
3890         struct ifnet    *ifp = adapter->ifp;
3891         struct e1000_hw *hw = &adapter->hw;
3892         u32             rctl, rxcsum, psize, srrctl = 0;
3893
3894         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3895
3896         /*
3897          * Make sure receives are disabled while setting
3898          * up the descriptor ring
3899          */
3900         rctl = E1000_READ_REG(hw, E1000_RCTL);
3901         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3902
3903         /*
3904         ** Set up for header split
3905         */
3906         if (rxr->hdr_split) {
3907                 /* Use a standard mbuf for the header */
3908                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3909                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3910         } else
3911                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3912
3913         /*
3914         ** Set up for jumbo frames
3915         */
3916         if (ifp->if_mtu > ETHERMTU) {
3917                 rctl |= E1000_RCTL_LPE;
3918                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3919                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3920
3921                 /* Set maximum packet len */
3922                 psize = adapter->max_frame_size;
3923                 /* are we on a vlan? */
3924                 if (adapter->ifp->if_vlantrunks != NULL)
3925                         psize += VLAN_TAG_SIZE;
3926                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3927         } else {
3928                 rctl &= ~E1000_RCTL_LPE;
3929                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3930                 rctl |= E1000_RCTL_SZ_2048;
3931         }
3932
3933         /* Setup the Base and Length of the Rx Descriptor Rings */
3934         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3935                 u64 bus_addr = rxr->rxdma.dma_paddr;
3936                 u32 rxdctl;
3937
3938                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3939                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3940                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3941                     (uint32_t)(bus_addr >> 32));
3942                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3943                     (uint32_t)bus_addr);
3944                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3945                 /* Enable this Queue */
3946                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3947                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3948                 rxdctl &= 0xFFF00000;
3949                 rxdctl |= IGB_RX_PTHRESH;
3950                 rxdctl |= IGB_RX_HTHRESH << 8;
3951                 rxdctl |= IGB_RX_WTHRESH << 16;
3952                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3953         }
3954
3955         /*
3956         ** Setup for RX MultiQueue
3957         */
3958         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3959         if (adapter->num_queues >1) {
3960                 u32 random[10], mrqc, shift = 0;
3961                 union igb_reta {
3962                         u32 dword;
3963                         u8  bytes[4];
3964                 } reta;
3965
3966                 karc4rand(&random, sizeof(random));
3967                 if (adapter->hw.mac.type == e1000_82575)
3968                         shift = 6;
3969                 /* Warning FM follows */
3970                 for (int i = 0; i < 128; i++) {
3971                         reta.bytes[i & 3] =
3972                             (i % adapter->num_queues) << shift;
3973                         if ((i & 3) == 3)
3974                                 E1000_WRITE_REG(hw,
3975                                     E1000_RETA(i >> 2), reta.dword);
3976                 }
3977                 /* Now fill in hash table */
3978                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3979                 for (int i = 0; i < 10; i++)
3980                         E1000_WRITE_REG_ARRAY(hw,
3981                             E1000_RSSRK(0), i, random[i]);
3982
3983                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3984                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
3985                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3986                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
3987                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3988                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
3989                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3990                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3991
3992                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3993
3994                 /*
3995                 ** NOTE: Receive Full-Packet Checksum Offload 
3996                 ** is mutually exclusive with Multiqueue. However
3997                 ** this is not the same as TCP/IP checksums which
3998                 ** still work.
3999                 */
4000                 rxcsum |= E1000_RXCSUM_PCSD;
4001 #if __FreeBSD_version >= 800000
4002                 /* For SCTP Offload */
4003                 if ((hw->mac.type == e1000_82576)
4004                     && (ifp->if_capenable & IFCAP_RXCSUM))
4005                         rxcsum |= E1000_RXCSUM_CRCOFL;
4006 #endif
4007         } else {
4008                 /* Non RSS setup */
4009                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4010                         rxcsum |= E1000_RXCSUM_IPPCSE;
4011 #if __FreeBSD_version >= 800000
4012                         if (adapter->hw.mac.type == e1000_82576)
4013                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4014 #endif
4015                 } else
4016                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4017         }
4018         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4019
4020         /* Setup the Receive Control Register */
4021         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4022         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4023                    E1000_RCTL_RDMTS_HALF |
4024                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4025         /* Strip CRC bytes. */
4026         rctl |= E1000_RCTL_SECRC;
4027         /* Make sure VLAN Filters are off */
4028         rctl &= ~E1000_RCTL_VFE;
4029         /* Don't store bad packets */
4030         rctl &= ~E1000_RCTL_SBP;
4031
4032         /* Enable Receives */
4033         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4034
4035         /*
4036          * Setup the HW Rx Head and Tail Descriptor Pointers
4037          *   - needs to be after enable
4038          */
4039         for (int i = 0; i < adapter->num_queues; i++) {
4040                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4041                 E1000_WRITE_REG(hw, E1000_RDT(i),
4042                      adapter->num_rx_desc - 1);
4043         }
4044         return;
4045 }
4046
4047 /*********************************************************************
4048  *
4049  *  Free receive rings.
4050  *
4051  **********************************************************************/
4052 static void
4053 igb_free_receive_structures(struct adapter *adapter)
4054 {
4055         struct rx_ring *rxr = adapter->rx_rings;
4056
4057         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4058 #ifdef NET_LRO 
4059                 struct lro_ctrl *lro = &rxr->lro;
4060 #endif
4061                 IGB_RX_LOCK(rxr);
4062                 igb_free_receive_buffers(rxr);
4063 #ifdef NET_LRO
4064                 tcp_lro_free(lro);
4065 #endif
4066                 igb_dma_free(adapter, &rxr->rxdma);
4067                 IGB_RX_UNLOCK(rxr);
4068                 IGB_RX_LOCK_DESTROY(rxr);
4069         }
4070
4071         kfree(adapter->rx_rings, M_DEVBUF);
4072 }
4073
4074 /*********************************************************************
4075  *
4076  *  Free receive ring data structures.
4077  *
4078  **********************************************************************/
4079 static void
4080 igb_free_receive_buffers(struct rx_ring *rxr)
4081 {
4082         struct adapter          *adapter = rxr->adapter;
4083         struct igb_rx_buf       *rxbuf;
4084         int i;
4085
4086         INIT_DEBUGOUT("free_receive_structures: begin");
4087
4088         if (rxr->rx_hspare_map != NULL) {
4089                 bus_dmamap_destroy(rxr->rx_htag, rxr->rx_hspare_map);
4090                 rxr->rx_hspare_map = NULL;
4091         }
4092
4093         if (rxr->rx_hspare_map != NULL) {
4094                 bus_dmamap_destroy(rxr->rx_ptag, rxr->rx_pspare_map);
4095                 rxr->rx_pspare_map = NULL;
4096         }
4097
4098         /* Cleanup any existing buffers */
4099         if (rxr->rx_buffers != NULL) {
4100                 for (i = 0; i < adapter->num_rx_desc; i++) {
4101                         rxbuf = &rxr->rx_buffers[i];
4102                         if (rxbuf->m_head != NULL) {
4103                                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
4104                                     BUS_DMASYNC_POSTREAD);
4105                                 bus_dmamap_unload(rxr->rx_htag,
4106                                     rxbuf->head_map);
4107                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4108                                 m_freem(rxbuf->m_head);
4109                         }
4110                         if (rxbuf->m_pack != NULL) {
4111                                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
4112                                     BUS_DMASYNC_POSTREAD);
4113                                 bus_dmamap_unload(rxr->rx_ptag,
4114                                     rxbuf->pack_map);
4115                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4116                                 m_freem(rxbuf->m_pack);
4117                         }
4118                         rxbuf->m_head = NULL;
4119                         rxbuf->m_pack = NULL;
4120                         if (rxbuf->head_map != NULL) {
4121                                 bus_dmamap_destroy(rxr->rx_htag,
4122                                     rxbuf->head_map);
4123                                 rxbuf->head_map = NULL;
4124                         }
4125                         if (rxbuf->pack_map != NULL) {
4126                                 bus_dmamap_destroy(rxr->rx_ptag,
4127                                     rxbuf->pack_map);
4128                                 rxbuf->pack_map = NULL;
4129                         }
4130                 }
4131                 if (rxr->rx_buffers != NULL) {
4132                         kfree(rxr->rx_buffers, M_DEVBUF);
4133                         rxr->rx_buffers = NULL;
4134                 }
4135         }
4136
4137         if (rxr->rx_htag != NULL) {
4138                 bus_dma_tag_destroy(rxr->rx_htag);
4139                 rxr->rx_htag = NULL;
4140         }
4141         if (rxr->rx_ptag != NULL) {
4142                 bus_dma_tag_destroy(rxr->rx_ptag);
4143                 rxr->rx_ptag = NULL;
4144         }
4145 }
4146
4147 static __inline void
4148 igb_rx_discard(struct rx_ring *rxr, union e1000_adv_rx_desc *cur, int i)
4149 {
4150
4151         if (rxr->fmp != NULL) {
4152                 rxr->fmp->m_flags |= M_PKTHDR;
4153                 m_freem(rxr->fmp);
4154                 rxr->fmp = NULL;
4155                 rxr->lmp = NULL;
4156         }
4157 }
4158
4159 static __inline void
4160 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4161 {
4162
4163         /*
4164          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4165          * should be computed by hardware. Also it should not have VLAN tag in
4166          * ethernet header.
4167          */
4168 #ifdef NET_LRO
4169         if (rxr->lro_enabled &&
4170             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4171             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4172             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4173             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4174             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4175             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4176                 /*
4177                  * Send to the stack if:
4178                  **  - LRO not enabled, or
4179                  **  - no LRO resources, or
4180                  **  - lro enqueue fails
4181                  */
4182                 if (rxr->lro.lro_cnt != 0)
4183                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4184                                 return;
4185         }
4186 #endif
4187         (*ifp->if_input)(ifp, m);
4188 }
4189
4190 /*********************************************************************
4191  *
4192  *  This routine executes in interrupt context. It replenishes
4193  *  the mbufs in the descriptor and sends data which has been
4194  *  dma'ed into host memory to upper layer.
4195  *
4196  *  We loop at most count times if count is > 0, or until done if
4197  *  count < 0.
4198  *
4199  *  Return TRUE if more to clean, FALSE otherwise
4200  *********************************************************************/
4201 static bool
4202 igb_rxeof(struct rx_ring *rxr, int count)
4203 {
4204         struct adapter          *adapter = rxr->adapter;
4205         struct ifnet            *ifp = adapter->ifp;
4206 #ifdef NET_LRO
4207         struct lro_ctrl         *lro = &rxr->lro;
4208         struct lro_entry        *queued;
4209 #endif
4210         int                     i, prog = 0;
4211         u32                     ptype, staterr = 0;
4212         union e1000_adv_rx_desc *cur;
4213
4214         IGB_RX_LOCK(rxr);
4215
4216         /* Main clean loop */
4217         for (i = rxr->next_to_check; count > 0; prog++) {
4218                 struct mbuf *sendmp, *mh, *mp;
4219                 u16 hlen, plen, hdr, vtag;
4220                 bool eop = FALSE;
4221                 u8 dopayload;
4222  
4223                 /* Sync the ring. */
4224                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4225                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4226                 cur = &rxr->rx_base[i];
4227                 staterr = le32toh(cur->wb.upper.status_error);
4228                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4229                         break;
4230                 if ((ifp->if_flags & IFF_RUNNING) == 0)
4231                         break;
4232                 count--;
4233                 sendmp = mh = mp = NULL;
4234                 cur->wb.upper.status_error = 0;
4235                 plen = le16toh(cur->wb.upper.length);
4236                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4237                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4238                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4239
4240                 /* Make sure all segments of a bad packet are discarded */
4241                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4242                     (rxr->discard)) {
4243                         ifp->if_ierrors++;
4244                         ++rxr->rx_discarded;
4245                         if (!eop) /* Catch subsequent segs */
4246                                 rxr->discard = TRUE;
4247                         else
4248                                 rxr->discard = FALSE;
4249                         igb_rx_discard(rxr, cur, i);
4250                         goto next_desc;
4251                 }
4252
4253                 /*
4254                 ** The way the hardware is configured to
4255                 ** split, it will ONLY use the header buffer
4256                 ** when header split is enabled, otherwise we
4257                 ** get normal behavior, ie, both header and
4258                 ** payload are DMA'd into the payload buffer.
4259                 **
4260                 ** The fmp test is to catch the case where a
4261                 ** packet spans multiple descriptors, in that
4262                 ** case only the first header is valid.
4263                 */
4264                 if (rxr->hdr_split && rxr->fmp == NULL) {
4265                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4266                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4267                         if (hlen > IGB_HDR_BUF)
4268                                 hlen = IGB_HDR_BUF;
4269                         /* Handle the header mbuf */
4270                         mh = rxr->rx_buffers[i].m_head;
4271                         mh->m_len = hlen;
4272                         dopayload = IGB_CLEAN_HEADER;
4273                         /*
4274                         ** Get the payload length, this
4275                         ** could be zero if its a small
4276                         ** packet.
4277                         */
4278                         if (plen > 0) {
4279                                 mp = rxr->rx_buffers[i].m_pack;
4280                                 mp->m_len = plen;
4281                                 mh->m_next = mp;
4282                                 dopayload = IGB_CLEAN_BOTH;
4283                                 rxr->rx_split_packets++;
4284                         }
4285                 } else {
4286                         /*
4287                         ** Either no header split, or a
4288                         ** secondary piece of a fragmented
4289                         ** split packet.
4290                         */
4291                         mh = rxr->rx_buffers[i].m_pack;
4292                         mh->m_len = plen;
4293                         dopayload = IGB_CLEAN_PAYLOAD;
4294                 }
4295
4296                 /*
4297                 ** get_buf will overwrite the writeback
4298                 ** descriptor so save the VLAN tag now.
4299                 */
4300                 vtag = le16toh(cur->wb.upper.vlan);
4301                 if (igb_get_buf(rxr, i, dopayload) != 0) {
4302                         ifp->if_iqdrops++;
4303                         /*
4304                          * We've dropped a frame due to lack of resources
4305                          * so we should drop entire multi-segmented
4306                          * frames until we encounter EOP.
4307                          */
4308                         if ((staterr & E1000_RXD_STAT_EOP) != 0)
4309                                 rxr->discard = TRUE;
4310                         igb_rx_discard(rxr, cur, i);
4311                         goto next_desc;
4312                 }
4313
4314                 /* Initial frame - setup */
4315                 if (rxr->fmp == NULL) {
4316                         mh->m_pkthdr.len = mh->m_len;
4317                         /* Store the first mbuf */
4318                         rxr->fmp = mh;
4319                         rxr->lmp = mh;
4320                         if (mp != NULL) {
4321                                 /* Add payload if split */
4322                                 mh->m_pkthdr.len += mp->m_len;
4323                                 rxr->lmp = mh->m_next;
4324                         }
4325                 } else {
4326                         /* Chain mbuf's together */
4327                         rxr->lmp->m_next = mh;
4328                         rxr->lmp = rxr->lmp->m_next;
4329                         rxr->fmp->m_pkthdr.len += mh->m_len;
4330                 }
4331
4332                 if (eop) {
4333                         rxr->fmp->m_pkthdr.rcvif = ifp;
4334                         ifp->if_ipackets++;
4335                         rxr->rx_packets++;
4336                         /* capture data for AIM */
4337                         rxr->packets++;
4338                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4339                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4340
4341                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4342                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4343                         /* XXX igb(4) always strips VLAN. */
4344                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4345                             (staterr & E1000_RXD_STAT_VP) != 0) {
4346                                 rxr->fmp->m_pkthdr.ether_vlantag = vtag;
4347                                 rxr->fmp->m_flags |= M_VLANTAG;
4348                         }
4349 #if __FreeBSD_version >= 800000
4350                         rxr->fmp->m_pkthdr.flowid = curcpu;
4351                         rxr->fmp->m_flags |= M_FLOWID;
4352 #endif
4353                         sendmp = rxr->fmp;
4354                         /* Make sure to set M_PKTHDR. */
4355                         sendmp->m_flags |= M_PKTHDR;
4356                         rxr->fmp = NULL;
4357                         rxr->lmp = NULL;
4358                 }
4359
4360 next_desc:
4361                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4362                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4363
4364                 rxr->last_cleaned = i; /* For updating tail */
4365
4366                 /* Advance our pointers to the next descriptor. */
4367                 if (++i == adapter->num_rx_desc)
4368                         i = 0;
4369  
4370                 /*
4371                 ** Note that we hold the RX lock thru
4372                 ** the following call so this ring's
4373                 ** next_to_check is not gonna change.
4374                 */
4375                 if (sendmp != NULL)
4376                         igb_rx_input(rxr, ifp, sendmp, ptype);
4377         }
4378
4379         if (prog == 0) {
4380                 IGB_RX_UNLOCK(rxr);
4381                 return (FALSE);
4382         }
4383
4384         rxr->next_to_check = i;
4385
4386         /* Advance the E1000's Receive Queue "Tail Pointer". */
4387         E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4388
4389         /*
4390          * Flush any outstanding LRO work
4391          */
4392 #ifdef NET_LRO
4393         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4394                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4395                 tcp_lro_flush(lro, queued);
4396         }
4397 #endif
4398
4399         IGB_RX_UNLOCK(rxr);
4400
4401         /*
4402         ** We still have cleaning to do?
4403         ** Schedule another interrupt if so.
4404         */
4405         if ((staterr & E1000_RXD_STAT_DD) != 0)
4406                 return (TRUE);
4407
4408         return (FALSE);
4409 }
4410
4411 /*********************************************************************
4412  *
4413  *  Verify that the hardware indicated that the checksum is valid.
4414  *  Inform the stack about the status of checksum so that stack
4415  *  doesn't spend time verifying the checksum.
4416  *
4417  *********************************************************************/
4418 static void
4419 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4420 {
4421         u16 status = (u16)staterr;
4422         u8  errors = (u8) (staterr >> 24);
4423         int sctp;
4424
4425         /* Ignore Checksum bit is set */
4426         if (status & E1000_RXD_STAT_IXSM) {
4427                 mp->m_pkthdr.csum_flags = 0;
4428                 return;
4429         }
4430
4431         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4432             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4433                 sctp = 1;
4434         else
4435                 sctp = 0;
4436         if (status & E1000_RXD_STAT_IPCS) {
4437                 /* Did it pass? */
4438                 if (!(errors & E1000_RXD_ERR_IPE)) {
4439                         /* IP Checksum Good */
4440                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4441                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4442                 } else
4443                         mp->m_pkthdr.csum_flags = 0;
4444         }
4445
4446         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4447                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4448 #if __FreeBSD_version >= 800000
4449                 if (sctp) /* reassign */
4450                         type = CSUM_SCTP_VALID;
4451 #endif
4452                 /* Did it pass? */
4453                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4454                         mp->m_pkthdr.csum_flags |= type;
4455                         if (sctp == 0)
4456                                 mp->m_pkthdr.csum_data = htons(0xffff);
4457                 }
4458         }
4459         return;
4460 }
4461
4462 /*
4463  * This routine is run via an vlan
4464  * config EVENT
4465  */
4466 static void
4467 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4468 {
4469         struct adapter  *adapter = ifp->if_softc;
4470         u32             index, bit;
4471
4472         if (ifp->if_softc !=  arg)   /* Not our event */
4473                 return;
4474
4475         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4476                 return;
4477
4478         index = (vtag >> 5) & 0x7F;
4479         bit = vtag & 0x1F;
4480         igb_shadow_vfta[index] |= (1 << bit);
4481         ++adapter->num_vlans;
4482         /* Re-init to load the changes */
4483         igb_init(adapter);
4484 }
4485
4486 /*
4487  * This routine is run via an vlan
4488  * unconfig EVENT
4489  */
4490 static void
4491 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4492 {
4493         struct adapter  *adapter = ifp->if_softc;
4494         u32             index, bit;
4495
4496         if (ifp->if_softc !=  arg)
4497                 return;
4498
4499         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4500                 return;
4501
4502         index = (vtag >> 5) & 0x7F;
4503         bit = vtag & 0x1F;
4504         igb_shadow_vfta[index] &= ~(1 << bit);
4505         --adapter->num_vlans;
4506         /* Re-init to load the changes */
4507         igb_init(adapter);
4508 }
4509
4510 static void
4511 igb_setup_vlan_hw_support(struct adapter *adapter)
4512 {
4513         struct e1000_hw *hw = &adapter->hw;
4514         u32             reg;
4515
4516         /*
4517         ** We get here thru init_locked, meaning
4518         ** a soft reset, this has already cleared
4519         ** the VFTA and other state, so if there
4520         ** have been no vlan's registered do nothing.
4521         */
4522         if (adapter->num_vlans == 0)
4523                 return;
4524
4525         /*
4526         ** A soft reset zero's out the VFTA, so
4527         ** we need to repopulate it now.
4528         */
4529         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4530                 if (igb_shadow_vfta[i] != 0)
4531                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4532                             i, igb_shadow_vfta[i]);
4533
4534         reg = E1000_READ_REG(hw, E1000_CTRL);
4535         reg |= E1000_CTRL_VME;
4536         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4537
4538         /* Enable the Filter Table */
4539         reg = E1000_READ_REG(hw, E1000_RCTL);
4540         reg &= ~E1000_RCTL_CFIEN;
4541         reg |= E1000_RCTL_VFE;
4542         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4543
4544         /* Update the frame size */
4545         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4546             adapter->max_frame_size + VLAN_TAG_SIZE);
4547 }
4548
4549 static void
4550 igb_enable_intr(struct adapter *adapter)
4551 {
4552         /* With RSS set up what to auto clear */
4553         if (adapter->msix_mem) {
4554                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4555                     adapter->eims_mask);
4556                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4557                     adapter->eims_mask);
4558                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4559                     adapter->eims_mask);
4560                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4561                     E1000_IMS_LSC);
4562         } else {
4563                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4564                     IMS_ENABLE_MASK);
4565         }
4566         E1000_WRITE_FLUSH(&adapter->hw);
4567
4568         return;
4569 }
4570
4571 static void
4572 igb_disable_intr(struct adapter *adapter)
4573 {
4574         if (adapter->msix_mem) {
4575                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4576                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4577         } 
4578         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4579         E1000_WRITE_FLUSH(&adapter->hw);
4580         return;
4581 }
4582
4583 /*
4584  * Bit of a misnomer, what this really means is
4585  * to enable OS management of the system... aka
4586  * to disable special hardware management features 
4587  */
4588 static void
4589 igb_init_manageability(struct adapter *adapter)
4590 {
4591         if (adapter->has_manage) {
4592                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4593                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4594
4595                 /* disable hardware interception of ARP */
4596                 manc &= ~(E1000_MANC_ARP_EN);
4597
4598                 /* enable receiving management packets to the host */
4599                 manc |= E1000_MANC_EN_MNG2HOST;
4600                 manc2h |= 1 << 5;  /* Mng Port 623 */
4601                 manc2h |= 1 << 6;  /* Mng Port 664 */
4602                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4603                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4604         }
4605 }
4606
4607 /*
4608  * Give control back to hardware management
4609  * controller if there is one.
4610  */
4611 static void
4612 igb_release_manageability(struct adapter *adapter)
4613 {
4614         if (adapter->has_manage) {
4615                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4616
4617                 /* re-enable hardware interception of ARP */
4618                 manc |= E1000_MANC_ARP_EN;
4619                 manc &= ~E1000_MANC_EN_MNG2HOST;
4620
4621                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4622         }
4623 }
4624
4625 /*
4626  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4627  * For ASF and Pass Through versions of f/w this means that
4628  * the driver is loaded. 
4629  *
4630  */
4631 static void
4632 igb_get_hw_control(struct adapter *adapter)
4633 {
4634         u32 ctrl_ext;
4635
4636         /* Let firmware know the driver has taken over */
4637         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4638         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4639             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4640 }
4641
4642 /*
4643  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4644  * For ASF and Pass Through versions of f/w this means that the
4645  * driver is no longer loaded.
4646  *
4647  */
4648 static void
4649 igb_release_hw_control(struct adapter *adapter)
4650 {
4651         u32 ctrl_ext;
4652
4653         /* Let firmware taken over control of h/w */
4654         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4655         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4656             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4657 }
4658
4659 static int
4660 igb_is_valid_ether_addr(uint8_t *addr)
4661 {
4662         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4663
4664         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4665                 return (FALSE);
4666         }
4667
4668         return (TRUE);
4669 }
4670
4671
4672 /*
4673  * Enable PCI Wake On Lan capability
4674  */
4675 void
4676 igb_enable_wakeup(device_t dev)
4677 {
4678         u16     cap, status;
4679         u8      id;
4680
4681         /* First find the capabilities pointer*/
4682         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4683         /* Read the PM Capabilities */
4684         id = pci_read_config(dev, cap, 1);
4685         if (id != PCIY_PMG)     /* Something wrong */
4686                 return;
4687         /* OK, we have the power capabilities, so
4688            now get the status register */
4689         cap += PCIR_POWER_STATUS;
4690         status = pci_read_config(dev, cap, 2);
4691         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4692         pci_write_config(dev, cap, status, 2);
4693         return;
4694 }
4695
4696
4697 /**********************************************************************
4698  *
4699  *  Update the board statistics counters.
4700  *
4701  **********************************************************************/
4702 static void
4703 igb_update_stats_counters(struct adapter *adapter)
4704 {
4705         struct ifnet   *ifp;
4706
4707         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4708            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4709                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4710                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4711         }
4712         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4713         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4714         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4715         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4716
4717         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4718         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4719         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4720         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4721         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4722         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4723         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4724         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4725         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4726         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4727         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4728         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4729         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4730         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4731         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4732         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4733         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4734         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4735         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4736         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4737
4738         /* For the 64-bit byte counters the low dword must be read first. */
4739         /* Both registers clear on the read of the high dword */
4740
4741         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4742         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4743
4744         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4745         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4746         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4747         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4748         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4749
4750         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4751         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4752
4753         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4754         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4755         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4756         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4757         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4758         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4759         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4760         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4761         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4762         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4763
4764         adapter->stats.algnerrc += 
4765                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4766         adapter->stats.rxerrc += 
4767                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4768         adapter->stats.tncrs += 
4769                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4770         adapter->stats.cexterr += 
4771                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4772         adapter->stats.tsctc += 
4773                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4774         adapter->stats.tsctfc += 
4775                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4776         ifp = adapter->ifp;
4777
4778         ifp->if_collisions = adapter->stats.colc;
4779
4780         /* Rx Errors */
4781         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4782             adapter->stats.crcerrs + adapter->stats.algnerrc +
4783             adapter->stats.ruc + adapter->stats.roc +
4784             adapter->stats.mpc + adapter->stats.cexterr;
4785
4786         /* Tx Errors */
4787         ifp->if_oerrors = adapter->stats.ecol +
4788             adapter->stats.latecol + adapter->watchdog_events;
4789 }
4790
4791
4792 /**********************************************************************
4793  *
4794  *  This routine is called only when igb_display_debug_stats is enabled.
4795  *  This routine provides a way to take a look at important statistics
4796  *  maintained by the driver and hardware.
4797  *
4798  **********************************************************************/
4799 static void
4800 igb_print_debug_info(struct adapter *adapter)
4801 {
4802         device_t dev = adapter->dev;
4803         struct igb_queue *que = adapter->queues;
4804         struct rx_ring *rxr = adapter->rx_rings;
4805         struct tx_ring *txr = adapter->tx_rings;
4806         uint8_t *hw_addr = adapter->hw.hw_addr;
4807
4808         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4809         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4810             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4811             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4812
4813 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4814         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4815             E1000_READ_REG(&adapter->hw, E1000_IMS),
4816             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4817 #endif
4818
4819         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4820             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4821             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4822         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4823             adapter->hw.fc.high_water,
4824             adapter->hw.fc.low_water);
4825
4826         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4827                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4828                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4829                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4830                 device_printf(dev, "rdh = %d, rdt = %d\n",
4831                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4832                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4833                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4834                     txr->me, (long long)txr->no_desc_avail);
4835                 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4836                     txr->me, (long long)txr->tx_packets);
4837                 device_printf(dev, "RX(%d) Packets received = %lld  ",
4838                     rxr->me, (long long)rxr->rx_packets);
4839         }
4840
4841         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4842 #ifdef NET_LRO
4843                 struct lro_ctrl *lro = &rxr->lro;
4844 #endif
4845                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4846                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4847                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4848                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4849                     (long long)rxr->rx_packets);
4850                 device_printf(dev, " Split Packets = %lld ",
4851                     (long long)rxr->rx_split_packets);
4852                 device_printf(dev, " Byte count = %lld\n",
4853                     (long long)rxr->rx_bytes);
4854 #ifdef NET_LRO
4855                 device_printf(dev,"RX(%d) LRO Queued= %d  ",
4856                     i, lro->lro_queued);
4857                 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4858 #endif
4859         }
4860
4861         for (int i = 0; i < adapter->num_queues; i++, que++)
4862                 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4863                     i, (long long)que->irqs);
4864
4865         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4866         device_printf(dev, "Mbuf defrag failed = %ld\n",
4867             adapter->mbuf_defrag_failed);
4868         device_printf(dev, "Std mbuf header failed = %ld\n",
4869             adapter->mbuf_header_failed);
4870         device_printf(dev, "Std mbuf packet failed = %ld\n",
4871             adapter->mbuf_packet_failed);
4872         device_printf(dev, "Driver dropped packets = %ld\n",
4873             adapter->dropped_pkts);
4874         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4875                 adapter->no_tx_dma_setup);
4876 }
4877
4878 static void
4879 igb_print_hw_stats(struct adapter *adapter)
4880 {
4881         device_t dev = adapter->dev;
4882
4883         device_printf(dev, "Excessive collisions = %lld\n",
4884             (long long)adapter->stats.ecol);
4885 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4886         device_printf(dev, "Symbol errors = %lld\n",
4887             (long long)adapter->stats.symerrs);
4888 #endif
4889         device_printf(dev, "Sequence errors = %lld\n",
4890             (long long)adapter->stats.sec);
4891         device_printf(dev, "Defer count = %lld\n",
4892             (long long)adapter->stats.dc);
4893         device_printf(dev, "Missed Packets = %lld\n",
4894             (long long)adapter->stats.mpc);
4895         device_printf(dev, "Receive No Buffers = %lld\n",
4896             (long long)adapter->stats.rnbc);
4897         /* RLEC is inaccurate on some hardware, calculate our own. */
4898         device_printf(dev, "Receive Length Errors = %lld\n",
4899             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4900         device_printf(dev, "Receive errors = %lld\n",
4901             (long long)adapter->stats.rxerrc);
4902         device_printf(dev, "Crc errors = %lld\n",
4903             (long long)adapter->stats.crcerrs);
4904         device_printf(dev, "Alignment errors = %lld\n",
4905             (long long)adapter->stats.algnerrc);
4906         /* On 82575 these are collision counts */
4907         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4908             (long long)adapter->stats.cexterr);
4909         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4910         device_printf(dev, "watchdog timeouts = %ld\n",
4911             adapter->watchdog_events);
4912         device_printf(dev, "XON Rcvd = %lld\n",
4913             (long long)adapter->stats.xonrxc);
4914         device_printf(dev, "XON Xmtd = %lld\n",
4915             (long long)adapter->stats.xontxc);
4916         device_printf(dev, "XOFF Rcvd = %lld\n",
4917             (long long)adapter->stats.xoffrxc);
4918         device_printf(dev, "XOFF Xmtd = %lld\n",
4919             (long long)adapter->stats.xofftxc);
4920         device_printf(dev, "Good Packets Rcvd = %lld\n",
4921             (long long)adapter->stats.gprc);
4922         device_printf(dev, "Good Packets Xmtd = %lld\n",
4923             (long long)adapter->stats.gptc);
4924         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4925             (long long)adapter->stats.tsctc);
4926         device_printf(dev, "TSO Contexts Failed = %lld\n",
4927             (long long)adapter->stats.tsctfc);
4928 }
4929
4930 /**********************************************************************
4931  *
4932  *  This routine provides a way to dump out the adapter eeprom,
4933  *  often a useful debug/service tool. This only dumps the first
4934  *  32 words, stuff that matters is in that extent.
4935  *
4936  **********************************************************************/
4937 static void
4938 igb_print_nvm_info(struct adapter *adapter)
4939 {
4940         u16     eeprom_data;
4941         int     i, j, row = 0;
4942
4943         /* Its a bit crude, but it gets the job done */
4944         kprintf("\nInterface EEPROM Dump:\n");
4945         kprintf("Offset\n0x0000  ");
4946         for (i = 0, j = 0; i < 32; i++, j++) {
4947                 if (j == 8) { /* Make the offset block */
4948                         j = 0; ++row;
4949                         kprintf("\n0x00%x0  ",row);
4950                 }
4951                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4952                 kprintf("%04x ", eeprom_data);
4953         }
4954         kprintf("\n");
4955 }
4956
4957 static int
4958 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4959 {
4960         struct adapter *adapter;
4961         int error;
4962         int result;
4963
4964         result = -1;
4965         error = sysctl_handle_int(oidp, &result, 0, req);
4966
4967         if (error || !req->newptr)
4968                 return (error);
4969
4970         if (result == 1) {
4971                 adapter = (struct adapter *)arg1;
4972                 igb_print_debug_info(adapter);
4973         }
4974         /*
4975          * This value will cause a hex dump of the
4976          * first 32 16-bit words of the EEPROM to
4977          * the screen.
4978          */
4979         if (result == 2) {
4980                 adapter = (struct adapter *)arg1;
4981                 igb_print_nvm_info(adapter);
4982         }
4983
4984         return (error);
4985 }
4986
4987
4988 static int
4989 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4990 {
4991         struct adapter *adapter;
4992         int error;
4993         int result;
4994
4995         result = -1;
4996         error = sysctl_handle_int(oidp, &result, 0, req);
4997
4998         if (error || !req->newptr)
4999                 return (error);
5000
5001         if (result == 1) {
5002                 adapter = (struct adapter *)arg1;
5003                 igb_print_hw_stats(adapter);
5004         }
5005
5006         return (error);
5007 }
5008
5009 static void
5010 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5011         const char *description, int *limit, int value)
5012 {
5013         *limit = value;
5014         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5015             SYSCTL_CHILDREN(adapter->sysctl_tree),
5016             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5017 }