Merge branch 'vendor/GCC44'
[dragonfly.git] / sys / dev / netif / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33
34
35 #include "opt_polling.h"
36 #include "opt_inet.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #if __FreeBSD_version >= 800000
41 #include <sys/buf_ring.h>
42 #endif
43 #include <sys/bus.h>
44 #include <sys/endian.h>
45 #include <sys/lock.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/module.h>
51 #include <sys/rman.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/taskqueue.h>
56 #include <sys/eventhandler.h>
57
58 #ifdef IGB_IEEE1588
59 #include <sys/ieee1588.h>
60 #endif
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/ifq_var.h>
69
70 #include <net/if_types.h>
71 #include <net/vlan/if_vlan_var.h>
72 #include <net/vlan/if_vlan_ether.h>
73
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #ifdef NET_LRO
81 #include <netinet/tcp_lro.h>
82 #endif
83 #include <netinet/udp.h>
84
85 #include <sys/in_cksum.h>
86 #include <bus/pci/pcivar.h>
87 #include <bus/pci/pcireg.h>
88
89 #include "e1000_api.h"
90 #include "e1000_82575.h"
91 #include "if_igb.h"
92 #include "ifcap_defines.h" // XXX
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.1";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
136                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
137         /* required last entry */
138         { 0, 0, 0, 0, 0}
139 };
140
141 /*********************************************************************
142  *  Table of branding strings for all supported NICs.
143  *********************************************************************/
144
145 static char *igb_strings[] = {
146         "Intel(R) PRO/1000 Network Connection"
147 };
148
149 /*********************************************************************
150  *  Function prototypes
151  *********************************************************************/
152 static int      igb_probe(device_t);
153 static int      igb_attach(device_t);
154 static int      igb_detach(device_t);
155 static int      igb_shutdown(device_t);
156 static int      igb_suspend(device_t);
157 static int      igb_resume(device_t);
158 static void     igb_start(struct ifnet *);
159 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
160 #if __FreeBSD_version >= 800000
161 static int      igb_mq_start(struct ifnet *, struct mbuf *);
162 static int      igb_mq_start_locked(struct ifnet *,
163                     struct tx_ring *, struct mbuf *);
164 static void     igb_qflush(struct ifnet *);
165 #endif
166 static int      igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
167 static void     igb_init(void *);
168 static void     igb_init_locked(struct adapter *);
169 static void     igb_stop(void *);
170 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
171 static int      igb_media_change(struct ifnet *);
172 static void     igb_identify_hardware(struct adapter *);
173 static int      igb_allocate_pci_resources(struct adapter *);
174 static int      igb_allocate_msix(struct adapter *);
175 static int      igb_allocate_legacy(struct adapter *);
176 static int      igb_setup_msix(struct adapter *);
177 static void     igb_free_pci_resources(struct adapter *);
178 static void     igb_local_timer(void *);
179 static void     igb_reset(struct adapter *);
180 static void     igb_setup_interface(device_t, struct adapter *);
181 static int      igb_allocate_queues(struct adapter *);
182 static void     igb_configure_queues(struct adapter *);
183
184 static int      igb_allocate_transmit_buffers(struct tx_ring *);
185 static void     igb_setup_transmit_structures(struct adapter *);
186 static void     igb_setup_transmit_ring(struct tx_ring *);
187 static void     igb_initialize_transmit_units(struct adapter *);
188 static void     igb_free_transmit_structures(struct adapter *);
189 static void     igb_free_transmit_buffers(struct tx_ring *);
190
191 static int      igb_allocate_receive_buffers(struct rx_ring *);
192 static int      igb_setup_receive_structures(struct adapter *);
193 static int      igb_setup_receive_ring(struct rx_ring *);
194 static void     igb_initialize_receive_units(struct adapter *);
195 static void     igb_free_receive_structures(struct adapter *);
196 static void     igb_free_receive_buffers(struct rx_ring *);
197 static void     igb_free_receive_ring(struct rx_ring *);
198
199 static void     igb_enable_intr(struct adapter *);
200 static void     igb_disable_intr(struct adapter *);
201 static void     igb_update_stats_counters(struct adapter *);
202 static bool     igb_txeof(struct tx_ring *);
203
204 static __inline void igb_rx_discard(struct rx_ring *,
205                     union e1000_adv_rx_desc *, int);
206 static __inline void igb_rx_input(struct rx_ring *,
207                     struct ifnet *, struct mbuf *, u32);
208
209 static bool     igb_rxeof(struct rx_ring *, int);
210 static void     igb_rx_checksum(u32, struct mbuf *, u32);
211 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
212 #if NET_TSO 
213 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
214 #endif
215 static void     igb_set_promisc(struct adapter *);
216 static void     igb_disable_promisc(struct adapter *);
217 static void     igb_set_multi(struct adapter *);
218 static void     igb_print_hw_stats(struct adapter *);
219 static void     igb_update_link_status(struct adapter *);
220 static int      igb_get_buf(struct rx_ring *, int, u8);
221
222 static void     igb_register_vlan(void *, struct ifnet *, u16);
223 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
224 static void     igb_setup_vlan_hw_support(struct adapter *);
225
226 static int      igb_xmit(struct tx_ring *, struct mbuf **);
227 static int      igb_dma_malloc(struct adapter *, bus_size_t,
228                     struct igb_dma_alloc *, int);
229 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
230 static void     igb_print_debug_info(struct adapter *);
231 static void     igb_print_nvm_info(struct adapter *);
232 static int      igb_is_valid_ether_addr(u8 *);
233 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
234 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
235 /* Management and WOL Support */
236 static void     igb_init_manageability(struct adapter *);
237 static void     igb_release_manageability(struct adapter *);
238 static void     igb_get_hw_control(struct adapter *);
239 static void     igb_release_hw_control(struct adapter *);
240 static void     igb_enable_wakeup(device_t);
241
242 static void     igb_irq_fast(void *);
243 static void     igb_add_rx_process_limit(struct adapter *, const char *,
244                     const char *, int *, int);
245 static void     igb_handle_rxtx(void *context, int pending);
246 static void     igb_handle_que(void *context, int pending);
247 static void     igb_handle_link(void *context, int pending);
248
249 /* These are MSIX only irq handlers */
250 static void     igb_msix_que(void *);
251 static void     igb_msix_link(void *);
252
253 #ifdef DEVICE_POLLING
254 static poll_handler_t igb_poll;
255 #endif /* POLLING */
256
257 /*********************************************************************
258  *  FreeBSD Device Interface Entry Points
259  *********************************************************************/
260
261 static device_method_t igb_methods[] = {
262         /* Device interface */
263         DEVMETHOD(device_probe, igb_probe),
264         DEVMETHOD(device_attach, igb_attach),
265         DEVMETHOD(device_detach, igb_detach),
266         DEVMETHOD(device_shutdown, igb_shutdown),
267         DEVMETHOD(device_suspend, igb_suspend),
268         DEVMETHOD(device_resume, igb_resume),
269         {0, 0}
270 };
271
272 static driver_t igb_driver = {
273         "igb", igb_methods, sizeof(struct adapter),
274 };
275
276 static devclass_t igb_devclass;
277 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
278 MODULE_DEPEND(igb, pci, 1, 1, 1);
279 MODULE_DEPEND(igb, ether, 1, 1, 1);
280
281 /*********************************************************************
282  *  Tunable default values.
283  *********************************************************************/
284
285 /* Descriptor defaults */
286 static int igb_rxd = IGB_DEFAULT_RXD;
287 static int igb_txd = IGB_DEFAULT_TXD;
288 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
289 TUNABLE_INT("hw.igb.txd", &igb_txd);
290
291 /*
292 ** AIM: Adaptive Interrupt Moderation
293 ** which means that the interrupt rate
294 ** is varied over time based on the
295 ** traffic for that interrupt vector
296 */
297 static int igb_enable_aim = TRUE;
298 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
299
300 /*
301  * MSIX should be the default for best performance,
302  * but this allows it to be forced off for testing.
303  */         
304 static int igb_enable_msix = 0;
305 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
306
307 /*
308  * Header split has seemed to be beneficial in
309  * many circumstances tested, however there have
310  * been some stability issues, so the default is
311  * off. 
312  */
313 static bool igb_header_split = FALSE;
314 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
315
316 /*
317 ** This will autoconfigure based on
318 ** the number of CPUs if left at 0.
319 */
320 static int igb_num_queues = 0;
321 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
322
323 /* How many packets rxeof tries to clean at a time */
324 static int igb_rx_process_limit = 100;
325 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
326
327 /* Flow control setting - default to FULL */
328 static int igb_fc_setting = e1000_fc_full;
329 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
330
331 /*
332 ** Shadow VFTA table, this is needed because
333 ** the real filter table gets cleared during
334 ** a soft reset and the driver needs to be able
335 ** to repopulate it.
336 */
337 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
338
339
340 /*********************************************************************
341  *  Device identification routine
342  *
343  *  igb_probe determines if the driver should be loaded on
344  *  adapter based on PCI vendor/device id of the adapter.
345  *
346  *  return BUS_PROBE_DEFAULT on success, positive on failure
347  *********************************************************************/
348
349 static int
350 igb_probe(device_t dev)
351 {
352         char            adapter_name[60];
353         uint16_t        pci_vendor_id = 0;
354         uint16_t        pci_device_id = 0;
355         uint16_t        pci_subvendor_id = 0;
356         uint16_t        pci_subdevice_id = 0;
357         igb_vendor_info_t *ent;
358
359         INIT_DEBUGOUT("igb_probe: begin");
360
361         pci_vendor_id = pci_get_vendor(dev);
362         if (pci_vendor_id != IGB_VENDOR_ID)
363                 return (ENXIO);
364
365         pci_device_id = pci_get_device(dev);
366         pci_subvendor_id = pci_get_subvendor(dev);
367         pci_subdevice_id = pci_get_subdevice(dev);
368
369         ent = igb_vendor_info_array;
370         while (ent->vendor_id != 0) {
371                 if ((pci_vendor_id == ent->vendor_id) &&
372                     (pci_device_id == ent->device_id) &&
373
374                     ((pci_subvendor_id == ent->subvendor_id) ||
375                     (ent->subvendor_id == PCI_ANY_ID)) &&
376
377                     ((pci_subdevice_id == ent->subdevice_id) ||
378                     (ent->subdevice_id == PCI_ANY_ID))) {
379                         ksprintf(adapter_name, "%s %s",
380                                 igb_strings[ent->index],
381                                 igb_driver_version);
382                         device_set_desc_copy(dev, adapter_name);
383                         return (BUS_PROBE_DEFAULT);
384                 }
385                 ent++;
386         }
387
388         return (ENXIO);
389 }
390
391 /*********************************************************************
392  *  Device initialization routine
393  *
394  *  The attach entry point is called when the driver is being loaded.
395  *  This routine identifies the type of hardware, allocates all resources
396  *  and initializes the hardware.
397  *
398  *  return 0 on success, positive on failure
399  *********************************************************************/
400
401 static int
402 igb_attach(device_t dev)
403 {
404         struct adapter  *adapter;
405         int             error = 0;
406         u16             eeprom_data;
407
408         INIT_DEBUGOUT("igb_attach: begin");
409
410         adapter = device_get_softc(dev);
411         adapter->dev = adapter->osdep.dev = dev;
412         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413
414         /* SYSCTL stuff */
415         sysctl_ctx_init(&adapter->sysctl_ctx);
416         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
417                                         SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
418                                         device_get_nameunit(adapter->dev),
419                                         CTLFLAG_RD, 0, "");
420         if (adapter->sysctl_tree == NULL) {
421                 device_printf(adapter->dev, "can't add sysctl node\n");
422                 error = ENOMEM;
423                 goto err_sysctl;
424         }
425
426         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
427             SYSCTL_CHILDREN(adapter->sysctl_tree),
428             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
429             igb_sysctl_debug_info, "I", "Debug Information");
430
431         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
432             SYSCTL_CHILDREN(adapter->sysctl_tree),
433             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
434             igb_sysctl_stats, "I", "Statistics");
435
436         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
437             SYSCTL_CHILDREN(adapter->sysctl_tree),
438             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
439             &igb_fc_setting, 0, "Flow Control");
440
441         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
442             SYSCTL_CHILDREN(adapter->sysctl_tree),
443             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
444             &igb_enable_aim, 1, "Interrupt Moderation");
445
446         callout_init(&adapter->timer);
447
448         /* Determine hardware and mac info */
449         igb_identify_hardware(adapter);
450
451         /* Setup PCI resources */
452         if (igb_allocate_pci_resources(adapter)) {
453                 device_printf(dev, "Allocation of PCI resources failed\n");
454                 error = ENXIO;
455                 goto err_pci;
456         }
457
458         /* Do Shared Code initialization */
459         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
460                 device_printf(dev, "Setup of Shared code failed\n");
461                 error = ENXIO;
462                 goto err_pci;
463         }
464
465         e1000_get_bus_info(&adapter->hw);
466
467         /* Sysctls for limiting the amount of work done in the taskqueue */
468         igb_add_rx_process_limit(adapter, "rx_processing_limit",
469             "max number of rx packets to process", &adapter->rx_process_limit,
470             igb_rx_process_limit);
471
472         /*
473          * Validate number of transmit and receive descriptors. It
474          * must not exceed hardware maximum, and must be multiple
475          * of E1000_DBA_ALIGN.
476          */
477         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
478             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
479                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
480                     IGB_DEFAULT_TXD, igb_txd);
481                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
482         } else
483                 adapter->num_tx_desc = igb_txd;
484         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
485             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
486                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
487                     IGB_DEFAULT_RXD, igb_rxd);
488                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
489         } else
490                 adapter->num_rx_desc = igb_rxd;
491
492         adapter->hw.mac.autoneg = DO_AUTO_NEG;
493         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
494         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
495
496         /* Copper options */
497         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
498                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
499                 adapter->hw.phy.disable_polarity_correction = FALSE;
500                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
501         }
502
503         /*
504          * Set the frame limits assuming
505          * standard ethernet sized frames.
506          */
507         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
508         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
509
510         /*
511         ** Allocate and Setup Queues
512         */
513         if (igb_allocate_queues(adapter)) {
514                 error = ENOMEM;
515                 goto err_pci;
516         }
517
518         /*
519         ** Start from a known state, this is
520         ** important in reading the nvm and
521         ** mac from that.
522         */
523         e1000_reset_hw(&adapter->hw);
524
525         /* Make sure we have a good EEPROM before we read from it */
526         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
527                 /*
528                 ** Some PCI-E parts fail the first check due to
529                 ** the link being in sleep state, call it again,
530                 ** if it fails a second time its a real issue.
531                 */
532                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
533                         device_printf(dev,
534                             "The EEPROM Checksum Is Not Valid\n");
535                         error = EIO;
536                         goto err_late;
537                 }
538         }
539
540         /*
541         ** Copy the permanent MAC address out of the EEPROM
542         */
543         if (e1000_read_mac_addr(&adapter->hw) < 0) {
544                 device_printf(dev, "EEPROM read error while reading MAC"
545                     " address\n");
546                 error = EIO;
547                 goto err_late;
548         }
549         /* Check its sanity */
550         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
551                 device_printf(dev, "Invalid MAC address\n");
552                 error = EIO;
553                 goto err_late;
554         }
555
556         /* 
557         ** Configure Interrupts
558         */
559         if ((adapter->msix > 1) && (igb_enable_msix))
560                 error = igb_allocate_msix(adapter);
561         else /* MSI or Legacy */
562                 error = igb_allocate_legacy(adapter);
563         if (error)
564                 goto err_late;
565
566         /* Setup OS specific network interface */
567         igb_setup_interface(dev, adapter);
568
569         /* Now get a good starting state */
570         igb_reset(adapter);
571
572         /* Initialize statistics */
573         igb_update_stats_counters(adapter);
574
575         adapter->hw.mac.get_link_status = 1;
576         igb_update_link_status(adapter);
577
578         /* Indicate SOL/IDER usage */
579         if (e1000_check_reset_block(&adapter->hw))
580                 device_printf(dev,
581                     "PHY reset is blocked due to SOL/IDER session.\n");
582
583         /* Determine if we have to control management hardware */
584         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
585
586         /*
587          * Setup Wake-on-Lan
588          */
589         /* APME bit in EEPROM is mapped to WUC.APME */
590         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
591         if (eeprom_data)
592                 adapter->wol = E1000_WUFC_MAG;
593
594         /* Register for VLAN events */
595         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
596              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
597         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
598              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
599
600         /* Tell the stack that the interface is not active */
601         adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
602
603         INIT_DEBUGOUT("igb_attach: end");
604
605         return (0);
606
607 err_late:
608         igb_free_transmit_structures(adapter);
609         igb_free_receive_structures(adapter);
610         igb_release_hw_control(adapter);
611 err_pci:
612         igb_free_pci_resources(adapter);
613 err_sysctl:
614         sysctl_ctx_free(&adapter->sysctl_ctx);
615         IGB_CORE_LOCK_DESTROY(adapter);
616
617         return (error);
618 }
619
620 /*********************************************************************
621  *  Device removal routine
622  *
623  *  The detach entry point is called when the driver is being removed.
624  *  This routine stops the adapter and deallocates all the resources
625  *  that were allocated for driver operation.
626  *
627  *  return 0 on success, positive on failure
628  *********************************************************************/
629
630 static int
631 igb_detach(device_t dev)
632 {
633         struct adapter  *adapter = device_get_softc(dev);
634
635         INIT_DEBUGOUT("igb_detach: begin");
636
637         /* Make sure VLANS are not using driver */
638         if (adapter->ifp->if_vlantrunks != NULL) {
639                 device_printf(dev,"Vlan in use, detach first\n");
640                 return (EBUSY);
641         }
642
643         IGB_CORE_LOCK(adapter);
644         adapter->in_detach = 1;
645         igb_stop(adapter);
646         IGB_CORE_UNLOCK(adapter);
647
648         e1000_phy_hw_reset(&adapter->hw);
649
650         /* Give control back to firmware */
651         igb_release_manageability(adapter);
652         igb_release_hw_control(adapter);
653
654         if (adapter->wol) {
655                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
656                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
657                 igb_enable_wakeup(dev);
658         }
659
660         /* Unregister VLAN events */
661         if (adapter->vlan_attach != NULL)
662                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
663         if (adapter->vlan_detach != NULL)
664                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
665
666         ether_ifdetach(adapter->ifp);
667
668         //callout_drain(&adapter->timer);
669         callout_stop(&adapter->timer);
670
671         igb_free_pci_resources(adapter);
672         bus_generic_detach(dev);
673
674         igb_free_transmit_structures(adapter);
675         igb_free_receive_structures(adapter);
676
677         sysctl_ctx_free(&adapter->sysctl_ctx);
678         IGB_CORE_LOCK_DESTROY(adapter);
679
680         return (0);
681 }
682
683 /*********************************************************************
684  *
685  *  Shutdown entry point
686  *
687  **********************************************************************/
688
689 static int
690 igb_shutdown(device_t dev)
691 {
692         return igb_suspend(dev);
693 }
694
695 /*
696  * Suspend/resume device methods.
697  */
698 static int
699 igb_suspend(device_t dev)
700 {
701         struct adapter *adapter = device_get_softc(dev);
702
703         IGB_CORE_LOCK(adapter);
704
705         igb_stop(adapter);
706
707         igb_release_manageability(adapter);
708         igb_release_hw_control(adapter);
709
710         if (adapter->wol) {
711                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
712                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
713                 igb_enable_wakeup(dev);
714         }
715
716         IGB_CORE_UNLOCK(adapter);
717
718         return bus_generic_suspend(dev);
719 }
720
721 static int
722 igb_resume(device_t dev)
723 {
724         struct adapter *adapter = device_get_softc(dev);
725         struct ifnet *ifp = adapter->ifp;
726
727         IGB_CORE_LOCK(adapter);
728         igb_init_locked(adapter);
729         igb_init_manageability(adapter);
730
731         if ((ifp->if_flags & IFF_UP) &&
732             (ifp->if_flags & IFF_RUNNING))
733                 igb_start(ifp);
734
735         IGB_CORE_UNLOCK(adapter);
736
737         return bus_generic_resume(dev);
738 }
739
740
741 /*********************************************************************
742  *  Transmit entry point
743  *
744  *  igb_start is called by the stack to initiate a transmit.
745  *  The driver will remain in this routine as long as there are
746  *  packets to transmit and transmit resources are available.
747  *  In case resources are not available stack is notified and
748  *  the packet is requeued.
749  **********************************************************************/
750
751 static void
752 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
753 {
754         struct adapter  *adapter = ifp->if_softc;
755         struct mbuf     *m_head;
756
757         IGB_TX_LOCK_ASSERT(txr);
758
759         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
760                 return;
761
762         /*
763          * Must purge on abort from this point on or the netif will call
764          * us endlessly.  Either that or set IFF_OACTIVE.
765          */
766         if (!adapter->link_active) {
767                 ifq_purge(&ifp->if_snd);
768                 return;
769         }
770
771         while (!ifq_is_empty(&ifp->if_snd)) {
772
773                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
774                 if (m_head == NULL)
775                         break;
776                 /*
777                  *  Encapsulation can modify our pointer, and or make it
778                  *  NULL on failure.  In that event, we can't requeue.
779                  */
780                 if (igb_xmit(txr, &m_head)) {
781                         if (m_head == NULL)
782                                 break;
783                         ifp->if_flags |= IFF_OACTIVE;
784                         ifq_prepend(&ifp->if_snd, m_head);
785                         break;
786                 }
787
788                 /* Send a copy of the frame to the BPF listener */
789                 ETHER_BPF_MTAP(ifp, m_head);
790
791                 /* Set watchdog on */
792                 txr->watchdog_check = TRUE;
793         }
794 }
795  
796 /*
797  * Legacy TX driver routine, called from the
798  * stack, always uses tx[0], and spins for it.
799  * Should not be used with multiqueue tx
800  */
801 static void
802 igb_start(struct ifnet *ifp)
803 {
804         struct adapter  *adapter = ifp->if_softc;
805         struct tx_ring  *txr = adapter->tx_rings;
806
807         if (ifp->if_flags & IFF_RUNNING) {
808                 IGB_TX_LOCK(txr);
809                 igb_start_locked(txr, ifp);
810                 IGB_TX_UNLOCK(txr);
811         }
812         return;
813 }
814
815 #if __FreeBSD_version >= 800000
816 /*
817 ** Multiqueue Transmit driver
818 **
819 */
820 static int
821 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
822 {
823         struct adapter  *adapter = ifp->if_softc;
824         struct tx_ring  *txr;
825         int             i = 0, err = 0;
826
827         /* Which queue to use */
828         if ((m->m_flags & M_FLOWID) != 0)
829                 i = m->m_pkthdr.flowid % adapter->num_queues;
830         txr = &adapter->tx_rings[i];
831
832         if (IGB_TX_TRYLOCK(txr)) {
833                 err = igb_mq_start_locked(ifp, txr, m);
834                 IGB_TX_UNLOCK(txr);
835         } else
836                 err = drbr_enqueue(ifp, txr->br, m);
837
838         return (err);
839 }
840
841 static int
842 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
843 {
844         struct adapter  *adapter = txr->adapter;
845         struct mbuf     *next;
846         int             err = 0, enq;
847
848         IGB_TX_LOCK_ASSERT(txr);
849
850         if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) !=
851             IFF_RUNNING || adapter->link_active == 0) {
852                 if (m != NULL)
853                         err = drbr_enqueue(ifp, txr->br, m);
854                 return (err);
855         }
856
857         enq = 0;
858         if (m == NULL) {
859                 next = drbr_dequeue(ifp, txr->br);
860         } else if (drbr_needs_enqueue(ifp, txr->br)) {
861                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
862                         return (err);
863                 next = drbr_dequeue(ifp, txr->br);
864         } else
865                 next = m;
866         /* Process the queue */
867         while (next != NULL) {
868                 if ((err = igb_xmit(txr, &next)) != 0) {
869                         if (next != NULL)
870                                 err = drbr_enqueue(ifp, txr->br, next);
871                         break;
872                 }
873                 enq++;
874                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
875                 ETHER_BPF_MTAP(ifp, next);
876                 if ((ifp->if_flags & IFF_RUNNING) == 0)
877                         break;
878                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
879                         ifp->if_flags |= IFF_OACTIVE;
880                         break;
881                 }
882                 next = drbr_dequeue(ifp, txr->br);
883         }
884         if (enq > 0) {
885                 /* Set the watchdog */
886                 txr->watchdog_check = TRUE;
887         }
888         return (err);
889 }
890
891 /*
892 ** Flush all ring buffers
893 */
894 static void
895 igb_qflush(struct ifnet *ifp)
896 {
897         struct adapter  *adapter = ifp->if_softc;
898         struct tx_ring  *txr = adapter->tx_rings;
899         struct mbuf     *m;
900
901         for (int i = 0; i < adapter->num_queues; i++, txr++) {
902                 IGB_TX_LOCK(txr);
903                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
904                         m_freem(m);
905                 IGB_TX_UNLOCK(txr);
906         }
907         if_qflush(ifp);
908 }
909 #endif /* __FreeBSD_version >= 800000 */
910
911 /*********************************************************************
912  *  Ioctl entry point
913  *
914  *  igb_ioctl is called when the user wants to configure the
915  *  interface.
916  *
917  *  return 0 on success, positive on failure
918  **********************************************************************/
919
920 static int
921 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cred)
922 {
923         struct adapter  *adapter = ifp->if_softc;
924         struct ifreq *ifr = (struct ifreq *)data;
925 #ifdef INET
926         struct ifaddr *ifa = (struct ifaddr *)data;
927 #endif
928         int error = 0;
929
930         if (adapter->in_detach)
931                 return (error);
932
933         switch (command) {
934         case SIOCSIFADDR:
935 #ifdef INET
936                 if (ifa->ifa_addr->sa_family == AF_INET) {
937                         /*
938                          * XXX
939                          * Since resetting hardware takes a very long time
940                          * and results in link renegotiation we only
941                          * initialize the hardware only when it is absolutely
942                          * required.
943                          */
944                         ifp->if_flags |= IFF_UP;
945                         if (!(ifp->if_flags & IFF_RUNNING)) {
946                                 IGB_CORE_LOCK(adapter);
947                                 igb_init_locked(adapter);
948                                 IGB_CORE_UNLOCK(adapter);
949                         }
950                         if (!(ifp->if_flags & IFF_NOARP))
951                                 arp_ifinit(ifp, ifa);
952                 } else
953 #endif
954                         error = ether_ioctl(ifp, command, data);
955                 break;
956         case SIOCSIFMTU:
957             {
958                 int max_frame_size;
959
960                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
961
962                 IGB_CORE_LOCK(adapter);
963                 max_frame_size = 9234;
964                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
965                     ETHER_CRC_LEN) {
966                         IGB_CORE_UNLOCK(adapter);
967                         error = EINVAL;
968                         break;
969                 }
970
971                 ifp->if_mtu = ifr->ifr_mtu;
972                 adapter->max_frame_size =
973                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
974                 igb_init_locked(adapter);
975                 IGB_CORE_UNLOCK(adapter);
976                 break;
977             }
978         case SIOCSIFFLAGS:
979                 IOCTL_DEBUGOUT("ioctl rcv'd:\
980                     SIOCSIFFLAGS (Set Interface Flags)");
981                 IGB_CORE_LOCK(adapter);
982                 if (ifp->if_flags & IFF_UP) {
983                         if ((ifp->if_flags & IFF_RUNNING)) {
984                                 if ((ifp->if_flags ^ adapter->if_flags) &
985                                     (IFF_PROMISC | IFF_ALLMULTI)) {
986                                         igb_disable_promisc(adapter);
987                                         igb_set_promisc(adapter);
988                                 }
989                         } else
990                                 igb_init_locked(adapter);
991                 } else
992                         if (ifp->if_flags & IFF_RUNNING)
993                                 igb_stop(adapter); 
994                 adapter->if_flags = ifp->if_flags;
995                 IGB_CORE_UNLOCK(adapter);
996                 break;
997         case SIOCADDMULTI:
998         case SIOCDELMULTI:
999                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1000                 if (ifp->if_flags & IFF_RUNNING) {
1001                         IGB_CORE_LOCK(adapter);
1002                         igb_disable_intr(adapter);
1003                         igb_set_multi(adapter);
1004 #ifdef DEVICE_POLLING
1005                         if ((ifp->if_flags & IFF_POLLING) == 0)
1006 #endif
1007                                 igb_enable_intr(adapter);
1008                         IGB_CORE_UNLOCK(adapter);
1009                 }
1010                 break;
1011         case SIOCSIFMEDIA:
1012                 /* Check SOL/IDER usage */
1013                 IGB_CORE_LOCK(adapter);
1014                 if (e1000_check_reset_block(&adapter->hw)) {
1015                         IGB_CORE_UNLOCK(adapter);
1016                         device_printf(adapter->dev, "Media change is"
1017                             " blocked due to SOL/IDER session.\n");
1018                         break;
1019                 }
1020                 IGB_CORE_UNLOCK(adapter);
1021         case SIOCGIFMEDIA:
1022                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1023                     SIOCxIFMEDIA (Get/Set Interface Media)");
1024                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1025                 break;
1026         case SIOCSIFCAP:
1027             {
1028                 int mask, reinit;
1029
1030                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1031                 reinit = 0;
1032                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1033 #ifdef DEVICE_POLLING
1034                 if (ifp->if_flags & IFF_POLLING) {
1035                         IGB_CORE_LOCK(adapter);
1036                         igb_disable_intr(adapter);
1037                         IGB_CORE_UNLOCK(adapter);
1038                 }
1039 #endif
1040                 if (mask & IFCAP_HWCSUM) {
1041                         ifp->if_capenable ^= IFCAP_HWCSUM;
1042                         reinit = 1;
1043                 }
1044 #ifdef NET_TSO 
1045                 if (mask & IFCAP_TSO4) {
1046                         ifp->if_capenable ^= IFCAP_TSO4;
1047                         reinit = 1;
1048                 }
1049 #endif
1050                 if (mask & IFCAP_VLAN_HWTAGGING) {
1051                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1052                         reinit = 1;
1053                 }
1054 #ifdef NET_LRO 
1055                 if (mask & IFCAP_LRO) {
1056                         ifp->if_capenable ^= IFCAP_LRO;
1057                         reinit = 1;
1058                 }
1059 #endif
1060                 if (reinit && (ifp->if_flags & IFF_RUNNING))
1061                         igb_init(adapter);
1062 #if 0
1063                 VLAN_CAPABILITIES(ifp);
1064 #endif
1065                 break;
1066             }
1067
1068         default:
1069                 error = ether_ioctl(ifp, command, data);
1070                 break;
1071         }
1072         IOCTL_DEBUGOUT("ioctl done");
1073
1074         return (error);
1075 }
1076
1077
1078 /*********************************************************************
1079  *  Init entry point
1080  *
1081  *  This routine is used in two ways. It is used by the stack as
1082  *  init entry point in network interface structure. It is also used
1083  *  by the driver as a hw/sw initialization routine to get to a
1084  *  consistent state.
1085  *
1086  *  return 0 on success, positive on failure
1087  **********************************************************************/
1088
1089 static void
1090 igb_init_locked(struct adapter *adapter)
1091 {
1092         struct ifnet    *ifp = adapter->ifp;
1093         device_t        dev = adapter->dev;
1094
1095         INIT_DEBUGOUT("igb_init: begin");
1096
1097         IGB_CORE_LOCK_ASSERT(adapter);
1098
1099         igb_disable_intr(adapter);
1100         callout_stop(&adapter->timer);
1101
1102         /* Get the latest mac address, User can use a LAA */
1103         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1104               ETHER_ADDR_LEN);
1105
1106         /* Put the address into the Receive Address Array */
1107         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1108
1109         igb_reset(adapter);
1110         igb_update_link_status(adapter);
1111
1112         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1113
1114         /* Set hardware offload abilities */
1115         ifp->if_hwassist = 0;
1116         if (ifp->if_capenable & IFCAP_TXCSUM) {
1117                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1118 #if __FreeBSD_version >= 800000
1119                 if (adapter->hw.mac.type == e1000_82576)
1120                         ifp->if_hwassist |= CSUM_SCTP;
1121 #endif
1122         }
1123
1124 #ifdef NET_TSO
1125         if (ifp->if_capenable & IFCAP_TSO4)
1126                 ifp->if_hwassist |= CSUM_TSO;
1127 #endif
1128
1129         /* Configure for OS presence */
1130         igb_init_manageability(adapter);
1131
1132         /* Prepare transmit descriptors and buffers */
1133         igb_setup_transmit_structures(adapter);
1134         igb_initialize_transmit_units(adapter);
1135
1136         /* Setup Multicast table */
1137         igb_set_multi(adapter);
1138
1139         /*
1140         ** Figure out the desired mbuf pool
1141         ** for doing jumbo/packetsplit
1142         */
1143         if (ifp->if_mtu > ETHERMTU)
1144                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1145         else
1146                 adapter->rx_mbuf_sz = MCLBYTES;
1147
1148         /* Prepare receive descriptors and buffers */
1149         if (igb_setup_receive_structures(adapter)) {
1150                 device_printf(dev, "Could not setup receive structures\n");
1151                 return;
1152         }
1153         igb_initialize_receive_units(adapter);
1154
1155         /* Don't lose promiscuous settings */
1156         igb_set_promisc(adapter);
1157
1158         ifp->if_flags |= IFF_RUNNING;
1159         ifp->if_flags &= ~IFF_OACTIVE;
1160
1161         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1162         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1163
1164         if (adapter->msix > 1) /* Set up queue routing */
1165                 igb_configure_queues(adapter);
1166
1167         /* Set up VLAN tag offload and filter */
1168         igb_setup_vlan_hw_support(adapter);
1169
1170         /* this clears any pending interrupts */
1171         E1000_READ_REG(&adapter->hw, E1000_ICR);
1172 #ifdef DEVICE_POLLING
1173         /*
1174          * Only enable interrupts if we are not polling, make sure
1175          * they are off otherwise.
1176          */
1177         if (ifp->if_flags & IFF_POLLING)
1178                 igb_disable_intr(adapter);
1179         else
1180 #endif /* DEVICE_POLLING */
1181         {
1182         igb_enable_intr(adapter);
1183         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1184         }
1185
1186         /* Don't reset the phy next time init gets called */
1187         adapter->hw.phy.reset_disable = TRUE;
1188         INIT_DEBUGOUT("igb_init: end");
1189 }
1190
1191 static void
1192 igb_init(void *arg)
1193 {
1194         struct adapter *adapter = arg;
1195
1196         IGB_CORE_LOCK(adapter);
1197         igb_init_locked(adapter);
1198         IGB_CORE_UNLOCK(adapter);
1199 }
1200
1201
1202 static void
1203 igb_handle_rxtx(void *context, int pending)
1204 {
1205         struct adapter  *adapter = context;
1206         struct tx_ring  *txr = adapter->tx_rings;
1207         struct rx_ring  *rxr = adapter->rx_rings;
1208         struct ifnet    *ifp;
1209
1210         ifp = adapter->ifp;
1211
1212         if (ifp->if_flags & IFF_RUNNING) {
1213                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1214                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1215                 IGB_TX_LOCK(txr);
1216                 igb_txeof(txr);
1217
1218 #if __FreeBSD_version >= 800000
1219                 if (!drbr_empty(ifp, txr->br))
1220                         igb_mq_start_locked(ifp, txr, NULL);
1221 #else
1222                 if (!ifq_is_empty(&ifp->if_snd))
1223                         igb_start_locked(txr, ifp);
1224 #endif
1225                 IGB_TX_UNLOCK(txr);
1226         }
1227
1228         igb_enable_intr(adapter);
1229 }
1230
1231 static void
1232 igb_handle_que(void *context, int pending)
1233 {
1234         struct igb_queue *que = context;
1235         struct adapter *adapter = que->adapter;
1236         struct tx_ring *txr = que->txr;
1237         struct rx_ring *rxr = que->rxr;
1238         struct ifnet    *ifp = adapter->ifp;
1239         u32             loop = IGB_MAX_LOOP;
1240         bool            more;
1241
1242         /* RX first */
1243         do {
1244                 more = igb_rxeof(rxr, -1);
1245         } while (loop-- && more);
1246
1247         if (IGB_TX_TRYLOCK(txr)) {
1248                 loop = IGB_MAX_LOOP;
1249                 do {
1250                         more = igb_txeof(txr);
1251                 } while (loop-- && more);
1252 #if __FreeBSD_version >= 800000
1253                 igb_mq_start_locked(ifp, txr, NULL);
1254 #else
1255                 if (!ifq_is_empty(&ifp->if_snd))
1256                         igb_start_locked(txr, ifp);
1257 #endif
1258                 IGB_TX_UNLOCK(txr);
1259         }
1260
1261         /* Reenable this interrupt */
1262 #ifdef DEVICE_POLLING
1263         if ((ifp->if_flags & IFF_POLLING) == 0)
1264 #endif
1265                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1266 }
1267
1268 /* Deal with link in a sleepable context */
1269 static void
1270 igb_handle_link(void *context, int pending)
1271 {
1272         struct adapter *adapter = context;
1273
1274         adapter->hw.mac.get_link_status = 1;
1275         igb_update_link_status(adapter);
1276 }
1277
1278 /*********************************************************************
1279  *
1280  *  MSI/Legacy Deferred
1281  *  Interrupt Service routine  
1282  *
1283  *********************************************************************/
1284 #define FILTER_STRAY
1285 #define FILTER_HANDLED
1286 static void
1287 igb_irq_fast(void *arg)
1288 {
1289         struct adapter  *adapter = arg;
1290         uint32_t        reg_icr;
1291
1292
1293         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1294
1295         /* Hot eject?  */
1296         if (reg_icr == 0xffffffff)
1297                 return FILTER_STRAY; 
1298
1299         /* Definitely not our interrupt.  */
1300         if (reg_icr == 0x0)
1301                 return FILTER_STRAY;
1302
1303         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1304                 return FILTER_STRAY;
1305
1306         /*
1307          * Mask interrupts until the taskqueue is finished running.  This is
1308          * cheap, just assume that it is needed.  This also works around the
1309          * MSI message reordering errata on certain systems.
1310          */
1311         igb_disable_intr(adapter);
1312         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1313
1314         /* Link status change */
1315         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1316                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1317
1318         if (reg_icr & E1000_ICR_RXO)
1319                 adapter->rx_overruns++;
1320         return FILTER_HANDLED;
1321 }
1322
1323 #ifdef DEVICE_POLLING
1324 /*********************************************************************
1325  *
1326  *  Legacy polling routine  
1327  *
1328  *********************************************************************/
1329 #if __FreeBSD_version >= 800000
1330 #define POLL_RETURN_COUNT(a) (a)
1331 static int
1332 #else
1333 #define POLL_RETURN_COUNT(a)
1334 static void
1335 #endif
1336 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1337 {
1338         struct adapter *adapter = ifp->if_softc;
1339         struct rx_ring  *rxr = adapter->rx_rings;
1340         struct tx_ring  *txr = adapter->tx_rings;
1341         u32             reg_icr, rx_done = 0;
1342         u32             loop = IGB_MAX_LOOP;
1343         bool            more;
1344
1345         IGB_CORE_LOCK(adapter);
1346         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1347                 IGB_CORE_UNLOCK(adapter);
1348                 return POLL_RETURN_COUNT(rx_done);
1349         }
1350
1351         if (cmd == POLL_AND_CHECK_STATUS) {
1352                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1353                 /* Link status change */
1354                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1355                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1356
1357                 if (reg_icr & E1000_ICR_RXO)
1358                         adapter->rx_overruns++;
1359         }
1360         IGB_CORE_UNLOCK(adapter);
1361
1362         /* TODO: rx_count */
1363         rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1364
1365         IGB_TX_LOCK(txr);
1366         do {
1367                 more = igb_txeof(txr);
1368         } while (loop-- && more);
1369 #if __FreeBSD_version >= 800000
1370         if (!drbr_empty(ifp, txr->br))
1371                 igb_mq_start_locked(ifp, txr, NULL);
1372 #else
1373         if (!ifq_is_empty(&ifp->if_snd))
1374                 igb_start_locked(txr, ifp);
1375 #endif
1376         IGB_TX_UNLOCK(txr);
1377         return POLL_RETURN_COUNT(rx_done);
1378 }
1379 #endif /* DEVICE_POLLING */
1380
1381 /*********************************************************************
1382  *
1383  *  MSIX TX Interrupt Service routine
1384  *
1385  **********************************************************************/
1386 static void
1387 igb_msix_que(void *arg)
1388 {
1389         struct igb_queue *que = arg;
1390         struct adapter *adapter = que->adapter;
1391         struct tx_ring *txr = que->txr;
1392         struct rx_ring *rxr = que->rxr;
1393         u32             newitr = 0;
1394         bool            more_tx, more_rx;
1395
1396         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1397         ++que->irqs;
1398
1399         IGB_TX_LOCK(txr);
1400         more_tx = igb_txeof(txr);
1401         IGB_TX_UNLOCK(txr);
1402
1403         more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1404
1405         if (igb_enable_aim == FALSE)
1406                 goto no_calc;
1407         /*
1408         ** Do Adaptive Interrupt Moderation:
1409         **  - Write out last calculated setting
1410         **  - Calculate based on average size over
1411         **    the last interval.
1412         */
1413         if (que->eitr_setting)
1414                 E1000_WRITE_REG(&adapter->hw,
1415                     E1000_EITR(que->msix), que->eitr_setting);
1416  
1417         que->eitr_setting = 0;
1418
1419         /* Idle, do nothing */
1420         if ((txr->bytes == 0) && (rxr->bytes == 0))
1421                 goto no_calc;
1422                                 
1423         /* Used half Default if sub-gig */
1424         if (adapter->link_speed != 1000)
1425                 newitr = IGB_DEFAULT_ITR / 2;
1426         else {
1427                 if ((txr->bytes) && (txr->packets))
1428                         newitr = txr->bytes/txr->packets;
1429                 if ((rxr->bytes) && (rxr->packets))
1430                         newitr = max(newitr,
1431                             (rxr->bytes / rxr->packets));
1432                 newitr += 24; /* account for hardware frame, crc */
1433                 /* set an upper boundary */
1434                 newitr = min(newitr, 3000);
1435                 /* Be nice to the mid range */
1436                 if ((newitr > 300) && (newitr < 1200))
1437                         newitr = (newitr / 3);
1438                 else
1439                         newitr = (newitr / 2);
1440         }
1441         newitr &= 0x7FFC;  /* Mask invalid bits */
1442         if (adapter->hw.mac.type == e1000_82575)
1443                 newitr |= newitr << 16;
1444         else
1445                 newitr |= 0x8000000;
1446                  
1447         /* save for next interrupt */
1448         que->eitr_setting = newitr;
1449
1450         /* Reset state */
1451         txr->bytes = 0;
1452         txr->packets = 0;
1453         rxr->bytes = 0;
1454         rxr->packets = 0;
1455
1456 no_calc:
1457         /* Schedule a clean task if needed*/
1458         if (more_tx || more_rx) 
1459                 taskqueue_enqueue(que->tq, &que->que_task);
1460         else
1461                 /* Reenable this interrupt */
1462                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1463         return;
1464 }
1465
1466
1467 /*********************************************************************
1468  *
1469  *  MSIX Link Interrupt Service routine
1470  *
1471  **********************************************************************/
1472
1473 static void
1474 igb_msix_link(void *arg)
1475 {
1476         struct adapter  *adapter = arg;
1477         u32             icr;
1478
1479         ++adapter->link_irq;
1480         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1481         if (!(icr & E1000_ICR_LSC))
1482                 goto spurious;
1483         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1484
1485 spurious:
1486         /* Rearm */
1487         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1488         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1489         return;
1490 }
1491
1492
1493 /*********************************************************************
1494  *
1495  *  Media Ioctl callback
1496  *
1497  *  This routine is called whenever the user queries the status of
1498  *  the interface using ifconfig.
1499  *
1500  **********************************************************************/
1501 static void
1502 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1503 {
1504         struct adapter *adapter = ifp->if_softc;
1505         u_char fiber_type = IFM_1000_SX;
1506
1507         INIT_DEBUGOUT("igb_media_status: begin");
1508
1509         IGB_CORE_LOCK(adapter);
1510         igb_update_link_status(adapter);
1511
1512         ifmr->ifm_status = IFM_AVALID;
1513         ifmr->ifm_active = IFM_ETHER;
1514
1515         if (!adapter->link_active) {
1516                 IGB_CORE_UNLOCK(adapter);
1517                 return;
1518         }
1519
1520         ifmr->ifm_status |= IFM_ACTIVE;
1521
1522         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1523             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1524                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1525         else {
1526                 switch (adapter->link_speed) {
1527                 case 10:
1528                         ifmr->ifm_active |= IFM_10_T;
1529                         break;
1530                 case 100:
1531                         ifmr->ifm_active |= IFM_100_TX;
1532                         break;
1533                 case 1000:
1534                         ifmr->ifm_active |= IFM_1000_T;
1535                         break;
1536                 }
1537                 if (adapter->link_duplex == FULL_DUPLEX)
1538                         ifmr->ifm_active |= IFM_FDX;
1539                 else
1540                         ifmr->ifm_active |= IFM_HDX;
1541         }
1542         IGB_CORE_UNLOCK(adapter);
1543 }
1544
1545 /*********************************************************************
1546  *
1547  *  Media Ioctl callback
1548  *
1549  *  This routine is called when the user changes speed/duplex using
1550  *  media/mediopt option with ifconfig.
1551  *
1552  **********************************************************************/
1553 static int
1554 igb_media_change(struct ifnet *ifp)
1555 {
1556         struct adapter *adapter = ifp->if_softc;
1557         struct ifmedia  *ifm = &adapter->media;
1558
1559         INIT_DEBUGOUT("igb_media_change: begin");
1560
1561         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1562                 return (EINVAL);
1563
1564         IGB_CORE_LOCK(adapter);
1565         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1566         case IFM_AUTO:
1567                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1568                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1569                 break;
1570         case IFM_1000_LX:
1571         case IFM_1000_SX:
1572         case IFM_1000_T:
1573                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1574                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1575                 break;
1576         case IFM_100_TX:
1577                 adapter->hw.mac.autoneg = FALSE;
1578                 adapter->hw.phy.autoneg_advertised = 0;
1579                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1580                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1581                 else
1582                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1583                 break;
1584         case IFM_10_T:
1585                 adapter->hw.mac.autoneg = FALSE;
1586                 adapter->hw.phy.autoneg_advertised = 0;
1587                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1588                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1589                 else
1590                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1591                 break;
1592         default:
1593                 device_printf(adapter->dev, "Unsupported media type\n");
1594         }
1595
1596         /* As the speed/duplex settings my have changed we need to
1597          * reset the PHY.
1598          */
1599         adapter->hw.phy.reset_disable = FALSE;
1600
1601         igb_init_locked(adapter);
1602         IGB_CORE_UNLOCK(adapter);
1603
1604         return (0);
1605 }
1606
1607
1608 /*********************************************************************
1609  *
1610  *  This routine maps the mbufs to Advanced TX descriptors.
1611  *  used by the 82575 adapter.
1612  *  
1613  **********************************************************************/
1614
1615 static int
1616 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1617 {
1618         struct adapter          *adapter = txr->adapter;
1619         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1620         bus_dmamap_t            map;
1621         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1622         union e1000_adv_tx_desc *txd = NULL;
1623         struct mbuf             *m_head;
1624         u32                     olinfo_status = 0, cmd_type_len = 0;
1625         int                     nsegs, i, j, error, first, last = 0;
1626         u32                     hdrlen = 0;
1627
1628         m_head = *m_headp;
1629
1630
1631         /* Set basic descriptor constants */
1632         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1633         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1634         if (m_head->m_flags & M_VLANTAG)
1635                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1636
1637         /*
1638          * Force a cleanup if number of TX descriptors
1639          * available hits the threshold
1640          */
1641         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1642                 igb_txeof(txr);
1643                 /* Now do we at least have a minimal? */
1644                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1645                         txr->no_desc_avail++;
1646                         return (ENOBUFS);
1647                 }
1648         }
1649
1650         /*
1651          * Map the packet for DMA.
1652          *
1653          * Capture the first descriptor index,
1654          * this descriptor will have the index
1655          * of the EOP which is the only one that
1656          * now gets a DONE bit writeback.
1657          */
1658         first = txr->next_avail_desc;
1659         tx_buffer = &txr->tx_buffers[first];
1660         tx_buffer_mapped = tx_buffer;
1661         map = tx_buffer->map;
1662
1663         error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1664             *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1665
1666         if (error == EFBIG) {
1667                 struct mbuf *m;
1668
1669                 m = m_defrag(*m_headp, MB_DONTWAIT);
1670                 if (m == NULL) {
1671                         adapter->mbuf_defrag_failed++;
1672                         m_freem(*m_headp);
1673                         *m_headp = NULL;
1674                         return (ENOBUFS);
1675                 }
1676                 *m_headp = m;
1677
1678                 /* Try it again */
1679                 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1680                     *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1681
1682                 if (error == ENOMEM) {
1683                         adapter->no_tx_dma_setup++;
1684                         return (error);
1685                 } else if (error != 0) {
1686                         adapter->no_tx_dma_setup++;
1687                         m_freem(*m_headp);
1688                         *m_headp = NULL;
1689                         return (error);
1690                 }
1691         } else if (error == ENOMEM) {
1692                 adapter->no_tx_dma_setup++;
1693                 return (error);
1694         } else if (error != 0) {
1695                 adapter->no_tx_dma_setup++;
1696                 m_freem(*m_headp);
1697                 *m_headp = NULL;
1698                 return (error);
1699         }
1700
1701         /* Check again to be sure we have enough descriptors */
1702         if (nsegs > (txr->tx_avail - 2)) {
1703                 txr->no_desc_avail++;
1704                 bus_dmamap_unload(txr->txtag, map);
1705                 return (ENOBUFS);
1706         }
1707         m_head = *m_headp;
1708
1709         /*
1710          * Set up the context descriptor:
1711          * used when any hardware offload is done.
1712          * This includes CSUM, VLAN, and TSO. It
1713          * will use the first descriptor.
1714          */
1715 #ifdef NET_TSO
1716         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1717                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1718                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1719                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1720                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1721                 } else
1722                         return (ENXIO); 
1723         } else
1724 #endif
1725                if (igb_tx_ctx_setup(txr, m_head))
1726                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1727
1728         /* Calculate payload length */
1729         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1730             << E1000_ADVTXD_PAYLEN_SHIFT);
1731
1732         /* 82575 needs the queue index added */
1733         if (adapter->hw.mac.type == e1000_82575)
1734                 olinfo_status |= txr->me << 4;
1735
1736         /* Set up our transmit descriptors */
1737         i = txr->next_avail_desc;
1738         for (j = 0; j < nsegs; j++) {
1739                 bus_size_t seg_len;
1740                 bus_addr_t seg_addr;
1741
1742                 tx_buffer = &txr->tx_buffers[i];
1743                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1744                 seg_addr = segs[j].ds_addr;
1745                 seg_len  = segs[j].ds_len;
1746
1747                 txd->read.buffer_addr = htole64(seg_addr);
1748                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1749                 txd->read.olinfo_status = htole32(olinfo_status);
1750                 last = i;
1751                 if (++i == adapter->num_tx_desc)
1752                         i = 0;
1753                 tx_buffer->m_head = NULL;
1754                 tx_buffer->next_eop = -1;
1755         }
1756
1757         txr->next_avail_desc = i;
1758         txr->tx_avail -= nsegs;
1759
1760         tx_buffer->m_head = m_head;
1761         tx_buffer_mapped->map = tx_buffer->map;
1762         tx_buffer->map = map;
1763         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1764
1765         /*
1766          * Last Descriptor of Packet
1767          * needs End Of Packet (EOP)
1768          * and Report Status (RS)
1769          */
1770         txd->read.cmd_type_len |=
1771             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1772         /*
1773          * Keep track in the first buffer which
1774          * descriptor will be written back
1775          */
1776         tx_buffer = &txr->tx_buffers[first];
1777         tx_buffer->next_eop = last;
1778         txr->watchdog_time = ticks;
1779
1780         /*
1781          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1782          * that this frame is available to transmit.
1783          */
1784         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1785             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1786         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1787         ++txr->tx_packets;
1788
1789         return (0);
1790
1791 }
1792
1793 static void
1794 igb_set_promisc(struct adapter *adapter)
1795 {
1796         struct ifnet    *ifp = adapter->ifp;
1797         uint32_t        reg_rctl;
1798
1799         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1800
1801         if (ifp->if_flags & IFF_PROMISC) {
1802                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1803                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1804         } else if (ifp->if_flags & IFF_ALLMULTI) {
1805                 reg_rctl |= E1000_RCTL_MPE;
1806                 reg_rctl &= ~E1000_RCTL_UPE;
1807                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1808         }
1809 }
1810
1811 static void
1812 igb_disable_promisc(struct adapter *adapter)
1813 {
1814         uint32_t        reg_rctl;
1815
1816         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1817
1818         reg_rctl &=  (~E1000_RCTL_UPE);
1819         reg_rctl &=  (~E1000_RCTL_MPE);
1820         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1821 }
1822
1823
1824 /*********************************************************************
1825  *  Multicast Update
1826  *
1827  *  This routine is called whenever multicast address list is updated.
1828  *
1829  **********************************************************************/
1830
1831 static void
1832 igb_set_multi(struct adapter *adapter)
1833 {
1834         struct ifnet    *ifp = adapter->ifp;
1835         struct ifmultiaddr *ifma;
1836         u32 reg_rctl = 0;
1837         static u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1838
1839         int mcnt = 0;
1840
1841         IOCTL_DEBUGOUT("igb_set_multi: begin");
1842
1843 #if 0
1844 #if __FreeBSD_version < 800000
1845         IF_ADDR_LOCK(ifp);
1846 #else
1847         if_maddr_rlock(ifp);
1848 #endif
1849 #endif
1850
1851         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1852                 if (ifma->ifma_addr->sa_family != AF_LINK)
1853                         continue;
1854
1855                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1856                         break;
1857
1858                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1859                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1860                 mcnt++;
1861         }
1862 #if 0
1863 #if __FreeBSD_version < 800000
1864         IF_ADDR_UNLOCK(ifp);
1865 #else
1866         if_maddr_runlock(ifp);
1867 #endif
1868 #endif
1869
1870         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1871                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1872                 reg_rctl |= E1000_RCTL_MPE;
1873                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1874         } else {
1875                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1876         }
1877 }
1878
1879
1880 /*********************************************************************
1881  *  Timer routine:
1882  *      This routine checks for link status,
1883  *      updates statistics, and does the watchdog.
1884  *
1885  **********************************************************************/
1886
1887 static void
1888 igb_local_timer(void *arg)
1889 {
1890         struct adapter          *adapter = arg;
1891
1892         IGB_CORE_LOCK(adapter);
1893
1894         struct ifnet            *ifp = adapter->ifp;
1895         device_t                dev = adapter->dev;
1896         struct tx_ring          *txr = adapter->tx_rings;
1897
1898
1899         IGB_CORE_LOCK_ASSERT(adapter);
1900
1901         igb_update_link_status(adapter);
1902         igb_update_stats_counters(adapter);
1903
1904         if (igb_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1905                 igb_print_hw_stats(adapter);
1906
1907         /*
1908         ** Watchdog: check for time since any descriptor was cleaned
1909         */
1910         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1911                 if (txr->watchdog_check == FALSE)
1912                         continue;
1913                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1914                         goto timeout;
1915         }
1916
1917         /* Trigger an RX interrupt on all queues */
1918 #ifdef DEVICE_POLLING
1919         if ((ifp->if_flags & IFF_POLLING) == 0)
1920 #endif
1921                 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1922         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1923         IGB_CORE_UNLOCK(adapter);
1924         return;
1925
1926 timeout:
1927         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1928         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1929             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1930             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1931         device_printf(dev,"TX(%d) desc avail = %d,"
1932             "Next TX to Clean = %d\n",
1933             txr->me, txr->tx_avail, txr->next_to_clean);
1934         adapter->ifp->if_flags &= ~IFF_RUNNING;
1935         adapter->watchdog_events++;
1936         igb_init_locked(adapter);
1937         IGB_CORE_UNLOCK(adapter);
1938 }
1939
1940 static void
1941 igb_update_link_status(struct adapter *adapter)
1942 {
1943         struct e1000_hw *hw = &adapter->hw;
1944         struct ifnet *ifp = adapter->ifp;
1945         device_t dev = adapter->dev;
1946         struct tx_ring *txr = adapter->tx_rings;
1947         u32 link_check = 0;
1948
1949         /* Get the cached link value or read for real */
1950         switch (hw->phy.media_type) {
1951         case e1000_media_type_copper:
1952                 if (hw->mac.get_link_status) {
1953                         /* Do the work to read phy */
1954                         e1000_check_for_link(hw);
1955                         link_check = !hw->mac.get_link_status;
1956                 } else
1957                         link_check = TRUE;
1958                 break;
1959         case e1000_media_type_fiber:
1960                 e1000_check_for_link(hw);
1961                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1962                                  E1000_STATUS_LU);
1963                 break;
1964         case e1000_media_type_internal_serdes:
1965                 e1000_check_for_link(hw);
1966                 link_check = adapter->hw.mac.serdes_has_link;
1967                 break;
1968         default:
1969         case e1000_media_type_unknown:
1970                 break;
1971         }
1972
1973         /* Now we check if a transition has happened */
1974         if (link_check && (adapter->link_active == 0)) {
1975                 e1000_get_speed_and_duplex(&adapter->hw, 
1976                     &adapter->link_speed, &adapter->link_duplex);
1977                 if (bootverbose)
1978                         device_printf(dev, "Link is up %d Mbps %s\n",
1979                             adapter->link_speed,
1980                             ((adapter->link_duplex == FULL_DUPLEX) ?
1981                             "Full Duplex" : "Half Duplex"));
1982                 adapter->link_active = 1;
1983                 ifp->if_baudrate = adapter->link_speed * 1000000;
1984                 ifp->if_link_state = LINK_STATE_UP;
1985                 if_link_state_change(ifp);
1986         } else if (!link_check && (adapter->link_active == 1)) {
1987                 ifp->if_baudrate = adapter->link_speed = 0;
1988                 adapter->link_duplex = 0;
1989                 if (bootverbose)
1990                         device_printf(dev, "Link is Down\n");
1991                 adapter->link_active = 0;
1992                 ifp->if_link_state = LINK_STATE_DOWN;
1993                 if_link_state_change(ifp);
1994                 /* Turn off watchdogs */
1995                 for (int i = 0; i < adapter->num_queues; i++, txr++)
1996                         txr->watchdog_check = FALSE;
1997         }
1998 }
1999
2000 /*********************************************************************
2001  *
2002  *  This routine disables all traffic on the adapter by issuing a
2003  *  global reset on the MAC and deallocates TX/RX buffers.
2004  *
2005  **********************************************************************/
2006
2007 static void
2008 igb_stop(void *arg)
2009 {
2010         struct adapter  *adapter = arg;
2011         struct ifnet    *ifp = adapter->ifp;
2012         struct tx_ring *txr = adapter->tx_rings;
2013
2014         IGB_CORE_LOCK_ASSERT(adapter);
2015
2016         INIT_DEBUGOUT("igb_stop: begin");
2017
2018         igb_disable_intr(adapter);
2019
2020         callout_stop(&adapter->timer);
2021
2022         /* Tell the stack that the interface is no longer active */
2023         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2024
2025         /* Unarm watchdog timer. */
2026         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2027                 IGB_TX_LOCK(txr);
2028                 txr->watchdog_check = FALSE;
2029                 IGB_TX_UNLOCK(txr);
2030         }
2031
2032         e1000_reset_hw(&adapter->hw);
2033         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2034 }
2035
2036
2037 /*********************************************************************
2038  *
2039  *  Determine hardware revision.
2040  *
2041  **********************************************************************/
2042 static void
2043 igb_identify_hardware(struct adapter *adapter)
2044 {
2045         device_t dev = adapter->dev;
2046
2047         /* Make sure our PCI config space has the necessary stuff set */
2048         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2049         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2050             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2051                 device_printf(dev, "Memory Access and/or Bus Master bits "
2052                     "were not set!\n");
2053                 adapter->hw.bus.pci_cmd_word |=
2054                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2055                 pci_write_config(dev, PCIR_COMMAND,
2056                     adapter->hw.bus.pci_cmd_word, 2);
2057         }
2058
2059         /* Save off the information about this board */
2060         adapter->hw.vendor_id = pci_get_vendor(dev);
2061         adapter->hw.device_id = pci_get_device(dev);
2062         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2063         adapter->hw.subsystem_vendor_id =
2064             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2065         adapter->hw.subsystem_device_id =
2066             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2067
2068         /* Do Shared Code Init and Setup */
2069         if (e1000_set_mac_type(&adapter->hw)) {
2070                 device_printf(dev, "Setup init failure\n");
2071                 return;
2072         }
2073 }
2074
2075 static int
2076 igb_allocate_pci_resources(struct adapter *adapter)
2077 {
2078         device_t        dev = adapter->dev;
2079         int             rid;
2080
2081         rid = PCIR_BAR(0);
2082         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2083             &rid, RF_ACTIVE);
2084         if (adapter->pci_mem == NULL) {
2085                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2086                 return (ENXIO);
2087         }
2088         adapter->osdep.mem_bus_space_tag =
2089             rman_get_bustag(adapter->pci_mem);
2090         adapter->osdep.mem_bus_space_handle =
2091             rman_get_bushandle(adapter->pci_mem);
2092         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2093
2094         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2095
2096         /* This will setup either MSI/X or MSI */
2097         adapter->msix = igb_setup_msix(adapter);
2098         adapter->hw.back = &adapter->osdep;
2099
2100         return (0);
2101 }
2102
2103 /*********************************************************************
2104  *
2105  *  Setup the Legacy or MSI Interrupt handler
2106  *
2107  **********************************************************************/
2108 static int
2109 igb_allocate_legacy(struct adapter *adapter)
2110 {
2111         device_t dev = adapter->dev;
2112         int error, rid = 0;
2113
2114         /* Turn off all interrupts */
2115         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2116
2117 #if 0
2118         /* MSI RID is 1 */
2119         if (adapter->msix == 1)
2120                 rid = 1;
2121 #endif
2122         rid = 0;
2123         /* We allocate a single interrupt resource */
2124         adapter->res = bus_alloc_resource_any(dev,
2125             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2126         if (adapter->res == NULL) {
2127                 device_printf(dev, "Unable to allocate bus resource: "
2128                     "interrupt\n");
2129                 return (ENXIO);
2130         }
2131
2132         /*
2133          * Try allocating a fast interrupt and the associated deferred
2134          * processing contexts.
2135          */
2136         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2137         /* Make tasklet for deferred link handling */
2138         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2139         adapter->tq = taskqueue_create("igb_taskq", M_INTWAIT,
2140             taskqueue_thread_enqueue, &adapter->tq);
2141         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2142             device_get_nameunit(adapter->dev));
2143         if ((error = bus_setup_intr(dev, adapter->res,
2144             /*INTR_TYPE_NET |*/ INTR_MPSAFE, igb_irq_fast,
2145             adapter, &adapter->tag, NULL)) != 0) {
2146                 device_printf(dev, "Failed to register fast interrupt "
2147                             "handler: %d\n", error);
2148                 taskqueue_free(adapter->tq);
2149                 adapter->tq = NULL;
2150                 return (error);
2151         }
2152
2153         return (0);
2154 }
2155
2156
2157 /*********************************************************************
2158  *
2159  *  Setup the MSIX Queue Interrupt handlers: 
2160  *
2161  **********************************************************************/
2162 static int
2163 igb_allocate_msix(struct adapter *adapter)
2164 {
2165         device_t                dev = adapter->dev;
2166         struct igb_queue        *que = adapter->queues;
2167         int                     error, rid, vector = 0;
2168
2169
2170         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2171                 rid = vector + 1;
2172                 que->res = bus_alloc_resource_any(dev,
2173                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2174                 if (que->res == NULL) {
2175                         device_printf(dev,
2176                             "Unable to allocate bus resource: "
2177                             "MSIX Queue Interrupt\n");
2178                         return (ENXIO);
2179                 }
2180                 error = bus_setup_intr(dev, que->res,
2181                     /*INTR_TYPE_NET |*/ INTR_MPSAFE, 
2182                     igb_msix_que, que, &que->tag, NULL);
2183                 if (error) {
2184                         que->res = NULL;
2185                         device_printf(dev, "Failed to register Queue handler");
2186                         return (error);
2187                 }
2188                 que->msix = vector;
2189                 if (adapter->hw.mac.type == e1000_82575)
2190                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2191                 else
2192                         que->eims = 1 << vector;
2193                 /*
2194                 ** Bind the msix vector, and thus the
2195                 ** rings to the corresponding cpu.
2196                 */
2197 #if 0
2198                 if (adapter->num_queues > 1)
2199                         bus_bind_intr(dev, que->res, i);
2200 #endif
2201                 /* Make tasklet for deferred handling */
2202                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2203                 que->tq = taskqueue_create("igb_que", M_INTWAIT,
2204                     taskqueue_thread_enqueue, &que->tq);
2205                 taskqueue_start_threads(&que->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s que",
2206                     device_get_nameunit(adapter->dev));
2207         }
2208
2209         /* And Link */
2210         rid = vector + 1;
2211         adapter->res = bus_alloc_resource_any(dev,
2212             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2213         if (adapter->res == NULL) {
2214                 device_printf(dev,
2215                     "Unable to allocate bus resource: "
2216                     "MSIX Link Interrupt\n");
2217                 return (ENXIO);
2218         }
2219         if ((error = bus_setup_intr(dev, adapter->res,
2220             /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2221             igb_msix_link, adapter, &adapter->tag, NULL)) != 0) {
2222                 device_printf(dev, "Failed to register Link handler");
2223                 return (error);
2224         }
2225         adapter->linkvec = vector;
2226
2227         /* Make tasklet for deferred handling */
2228         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2229         adapter->tq = taskqueue_create("igb_link", M_INTWAIT,
2230             taskqueue_thread_enqueue, &adapter->tq);
2231         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s link",
2232             device_get_nameunit(adapter->dev));
2233
2234         return (0);
2235 }
2236
2237
2238 static void
2239 igb_configure_queues(struct adapter *adapter)
2240 {
2241         struct  e1000_hw        *hw = &adapter->hw;
2242         struct  igb_queue       *que;
2243         u32                     tmp, ivar = 0;
2244         u32                     newitr = IGB_DEFAULT_ITR;
2245
2246         /* First turn on RSS capability */
2247         if (adapter->hw.mac.type > e1000_82575)
2248                 E1000_WRITE_REG(hw, E1000_GPIE,
2249                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2250                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2251
2252         /* Turn on MSIX */
2253         switch (adapter->hw.mac.type) {
2254         case e1000_82580:
2255                 /* RX entries */
2256                 for (int i = 0; i < adapter->num_queues; i++) {
2257                         u32 index = i >> 1;
2258                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2259                         que = &adapter->queues[i];
2260                         if (i & 1) {
2261                                 ivar &= 0xFF00FFFF;
2262                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2263                         } else {
2264                                 ivar &= 0xFFFFFF00;
2265                                 ivar |= que->msix | E1000_IVAR_VALID;
2266                         }
2267                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2268                 }
2269                 /* TX entries */
2270                 for (int i = 0; i < adapter->num_queues; i++) {
2271                         u32 index = i >> 1;
2272                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2273                         que = &adapter->queues[i];
2274                         if (i & 1) {
2275                                 ivar &= 0x00FFFFFF;
2276                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2277                         } else {
2278                                 ivar &= 0xFFFF00FF;
2279                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2280                         }
2281                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2282                         adapter->eims_mask |= que->eims;
2283                 }
2284
2285                 /* And for the link interrupt */
2286                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2287                 adapter->link_mask = 1 << adapter->linkvec;
2288                 adapter->eims_mask |= adapter->link_mask;
2289                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2290                 break;
2291         case e1000_82576:
2292                 /* RX entries */
2293                 for (int i = 0; i < adapter->num_queues; i++) {
2294                         u32 index = i & 0x7; /* Each IVAR has two entries */
2295                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2296                         que = &adapter->queues[i];
2297                         if (i < 8) {
2298                                 ivar &= 0xFFFFFF00;
2299                                 ivar |= que->msix | E1000_IVAR_VALID;
2300                         } else {
2301                                 ivar &= 0xFF00FFFF;
2302                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2303                         }
2304                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2305                         adapter->eims_mask |= que->eims;
2306                 }
2307                 /* TX entries */
2308                 for (int i = 0; i < adapter->num_queues; i++) {
2309                         u32 index = i & 0x7; /* Each IVAR has two entries */
2310                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2311                         que = &adapter->queues[i];
2312                         if (i < 8) {
2313                                 ivar &= 0xFFFF00FF;
2314                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2315                         } else {
2316                                 ivar &= 0x00FFFFFF;
2317                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2318                         }
2319                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2320                         adapter->eims_mask |= que->eims;
2321                 }
2322
2323                 /* And for the link interrupt */
2324                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2325                 adapter->link_mask = 1 << adapter->linkvec;
2326                 adapter->eims_mask |= adapter->link_mask;
2327                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2328                 break;
2329
2330         case e1000_82575:
2331                 /* enable MSI-X support*/
2332                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2333                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2334                 /* Auto-Mask interrupts upon ICR read. */
2335                 tmp |= E1000_CTRL_EXT_EIAME;
2336                 tmp |= E1000_CTRL_EXT_IRCA;
2337                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2338
2339                 /* Queues */
2340                 for (int i = 0; i < adapter->num_queues; i++) {
2341                         que = &adapter->queues[i];
2342                         tmp = E1000_EICR_RX_QUEUE0 << i;
2343                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2344                         que->eims = tmp;
2345                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2346                             i, que->eims);
2347                         adapter->eims_mask |= que->eims;
2348                 }
2349
2350                 /* Link */
2351                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2352                     E1000_EIMS_OTHER);
2353                 adapter->link_mask |= E1000_EIMS_OTHER;
2354                 adapter->eims_mask |= adapter->link_mask;
2355         default:
2356                 break;
2357         }
2358
2359         /* Set the starting interrupt rate */
2360         if (hw->mac.type == e1000_82575)
2361                 newitr |= newitr << 16;
2362         else
2363                 newitr |= 0x8000000;
2364
2365         for (int i = 0; i < adapter->num_queues; i++) {
2366                 que = &adapter->queues[i];
2367                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2368         }
2369
2370         return;
2371 }
2372
2373
2374 static void
2375 igb_free_pci_resources(struct adapter *adapter)
2376 {
2377         struct          igb_queue *que = adapter->queues;
2378         device_t        dev = adapter->dev;
2379         int             rid;
2380
2381         /*
2382         ** There is a slight possibility of a failure mode
2383         ** in attach that will result in entering this function
2384         ** before interrupt resources have been initialized, and
2385         ** in that case we do not want to execute the loops below
2386         ** We can detect this reliably by the state of the adapter
2387         ** res pointer.
2388         */
2389         if (adapter->res == NULL)
2390                 goto mem;
2391
2392         /*
2393          * First release all the interrupt resources:
2394          */
2395         for (int i = 0; i < adapter->num_queues; i++, que++) {
2396                 rid = que->msix + 1;
2397                 if (que->tag != NULL) {
2398                         bus_teardown_intr(dev, que->res, que->tag);
2399                         que->tag = NULL;
2400                 }
2401                 if (que->res != NULL)
2402                         bus_release_resource(dev,
2403                             SYS_RES_IRQ, rid, que->res);
2404         }
2405
2406         /* Clean the Legacy or Link interrupt last */
2407         if (adapter->linkvec) /* we are doing MSIX */
2408                 rid = adapter->linkvec + 1;
2409         else
2410                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2411
2412         if (adapter->tag != NULL) {
2413                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2414                 adapter->tag = NULL;
2415         }
2416         if (adapter->res != NULL)
2417                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2418
2419 mem:
2420         if (adapter->msix)
2421                 pci_release_msi(dev);
2422
2423         if (adapter->msix_mem != NULL)
2424                 bus_release_resource(dev, SYS_RES_MEMORY,
2425                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2426
2427         if (adapter->pci_mem != NULL)
2428                 bus_release_resource(dev, SYS_RES_MEMORY,
2429                     PCIR_BAR(0), adapter->pci_mem);
2430
2431 }
2432
2433 /*
2434  * Setup Either MSI/X or MSI
2435  */
2436 static int
2437 igb_setup_msix(struct adapter *adapter)
2438 {
2439         device_t dev = adapter->dev;
2440         int rid, want, queues, msgs;
2441
2442         /* tuneable override */
2443         if (igb_enable_msix == 0)
2444                 goto msi;
2445
2446         /* First try MSI/X */
2447         rid = PCIR_BAR(IGB_MSIX_BAR);
2448         adapter->msix_mem = bus_alloc_resource_any(dev,
2449             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2450         if (!adapter->msix_mem) {
2451                 /* May not be enabled */
2452                 device_printf(adapter->dev,
2453                     "Unable to map MSIX table \n");
2454                 goto msi;
2455         }
2456
2457         msgs = pci_msix_count(dev); 
2458         if (msgs == 0) { /* system has msix disabled */
2459                 bus_release_resource(dev, SYS_RES_MEMORY,
2460                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2461                 adapter->msix_mem = NULL;
2462                 goto msi;
2463         }
2464
2465         /* Figure out a reasonable auto config value */
2466         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2467
2468         /* Can have max of 4 queues on 82575 */
2469         if (adapter->hw.mac.type == e1000_82575) {
2470                 if (queues > 4)
2471                         queues = 4;
2472                 if (igb_num_queues > 4)
2473                         igb_num_queues = 4;
2474         }
2475
2476         if (igb_num_queues == 0)
2477                 igb_num_queues = queues;
2478
2479         /*
2480         ** One vector (RX/TX pair) per queue
2481         ** plus an additional for Link interrupt
2482         */
2483         want = igb_num_queues + 1;
2484         if (msgs >= want)
2485                 msgs = want;
2486         else {
2487                 device_printf(adapter->dev,
2488                     "MSIX Configuration Problem, "
2489                     "%d vectors configured, but %d queues wanted!\n",
2490                     msgs, want);
2491                 return (ENXIO);
2492         }
2493         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2494                 device_printf(adapter->dev,
2495                     "Using MSIX interrupts with %d vectors\n", msgs);
2496                 adapter->num_queues = igb_num_queues;
2497                 return (msgs);
2498         }
2499 msi:
2500         msgs = pci_msi_count(dev);
2501         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2502                 device_printf(adapter->dev,"Using MSI interrupt\n");
2503         return (msgs);
2504 }
2505
2506 /*********************************************************************
2507  *
2508  *  Set up an fresh starting state
2509  *
2510  **********************************************************************/
2511 static void
2512 igb_reset(struct adapter *adapter)
2513 {
2514         device_t        dev = adapter->dev;
2515         struct e1000_hw *hw = &adapter->hw;
2516         struct e1000_fc_info *fc = &hw->fc;
2517         struct ifnet    *ifp = adapter->ifp;
2518         u32             pba = 0;
2519         u16             hwm;
2520
2521         INIT_DEBUGOUT("igb_reset: begin");
2522
2523         /* Let the firmware know the OS is in control */
2524         igb_get_hw_control(adapter);
2525
2526         /*
2527          * Packet Buffer Allocation (PBA)
2528          * Writing PBA sets the receive portion of the buffer
2529          * the remainder is used for the transmit buffer.
2530          */
2531         switch (hw->mac.type) {
2532         case e1000_82575:
2533                 pba = E1000_PBA_32K;
2534                 break;
2535         case e1000_82576:
2536                 pba = E1000_PBA_64K;
2537                 break;
2538         case e1000_82580:
2539                 pba = E1000_PBA_35K;
2540         default:
2541                 break;
2542         }
2543
2544         /* Special needs in case of Jumbo frames */
2545         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2546                 u32 tx_space, min_tx, min_rx;
2547                 pba = E1000_READ_REG(hw, E1000_PBA);
2548                 tx_space = pba >> 16;
2549                 pba &= 0xffff;
2550                 min_tx = (adapter->max_frame_size +
2551                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2552                 min_tx = roundup2(min_tx, 1024);
2553                 min_tx >>= 10;
2554                 min_rx = adapter->max_frame_size;
2555                 min_rx = roundup2(min_rx, 1024);
2556                 min_rx >>= 10;
2557                 if (tx_space < min_tx &&
2558                     ((min_tx - tx_space) < pba)) {
2559                         pba = pba - (min_tx - tx_space);
2560                         /*
2561                          * if short on rx space, rx wins
2562                          * and must trump tx adjustment
2563                          */
2564                         if (pba < min_rx)
2565                                 pba = min_rx;
2566                 }
2567                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2568         }
2569
2570         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2571
2572         /*
2573          * These parameters control the automatic generation (Tx) and
2574          * response (Rx) to Ethernet PAUSE frames.
2575          * - High water mark should allow for at least two frames to be
2576          *   received after sending an XOFF.
2577          * - Low water mark works best when it is very near the high water mark.
2578          *   This allows the receiver to restart by sending XON when it has
2579          *   drained a bit.
2580          */
2581         hwm = min(((pba << 10) * 9 / 10),
2582             ((pba << 10) - 2 * adapter->max_frame_size));
2583
2584         if (hw->mac.type < e1000_82576) {
2585                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2586                 fc->low_water = fc->high_water - 8;
2587         } else {
2588                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2589                 fc->low_water = fc->high_water - 16;
2590         }
2591
2592         fc->pause_time = IGB_FC_PAUSE_TIME;
2593         fc->send_xon = TRUE;
2594
2595         /* Set Flow control, use the tunable location if sane */
2596         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2597                 fc->requested_mode = igb_fc_setting;
2598         else
2599                 fc->requested_mode = e1000_fc_none;
2600
2601         fc->current_mode = fc->requested_mode;
2602
2603         /* Issue a global reset */
2604         e1000_reset_hw(hw);
2605         E1000_WRITE_REG(hw, E1000_WUC, 0);
2606
2607         if (e1000_init_hw(hw) < 0)
2608                 device_printf(dev, "Hardware Initialization Failed\n");
2609
2610         if (hw->mac.type == e1000_82580) {
2611                 u32 reg;
2612
2613                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2614                 /*
2615                  * 0x80000000 - enable DMA COAL
2616                  * 0x10000000 - use L0s as low power
2617                  * 0x20000000 - use L1 as low power
2618                  * X << 16 - exit dma coal when rx data exceeds X kB
2619                  * Y - upper limit to stay in dma coal in units of 32usecs
2620                  */
2621                 E1000_WRITE_REG(hw, E1000_DMACR,
2622                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2623
2624                 /* set hwm to PBA -  2 * max frame size */
2625                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2626                 /*
2627                  * This sets the time to wait before requesting transition to
2628                  * low power state to number of usecs needed to receive 1 512
2629                  * byte frame at gigabit line rate
2630                  */
2631                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2632
2633                 /* free space in tx packet buffer to wake from DMA coal */
2634                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2635                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2636
2637                 /* make low power state decision controlled by DMA coal */
2638                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2639                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2640                     reg | E1000_PCIEMISC_LX_DECISION);
2641         }
2642
2643         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2644         e1000_get_phy_info(hw);
2645         e1000_check_for_link(hw);
2646         return;
2647 }
2648
2649 /*********************************************************************
2650  *
2651  *  Setup networking device structure and register an interface.
2652  *
2653  **********************************************************************/
2654 static void
2655 igb_setup_interface(device_t dev, struct adapter *adapter)
2656 {
2657         struct ifnet   *ifp;
2658
2659         INIT_DEBUGOUT("igb_setup_interface: begin");
2660
2661         ifp = adapter->ifp = &adapter->arpcom.ac_if;
2662         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2663         ifp->if_mtu = ETHERMTU;
2664         ifp->if_init =  igb_init;
2665         ifp->if_softc = adapter;
2666         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2667         ifp->if_ioctl = igb_ioctl;
2668         ifp->if_start = igb_start;
2669 #ifdef DEVICE_POLLING
2670         ifp->if_poll = igb_poll;
2671 #endif
2672 #if __FreeBSD_version >= 800000
2673         ifp->if_transmit = igb_mq_start;
2674         ifp->if_qflush = igb_qflush;
2675 #endif
2676         ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2677         ifq_set_ready(&ifp->if_snd);
2678
2679         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2680
2681         ifp->if_capabilities = ifp->if_capenable = 0;
2682
2683         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2684 #ifdef NET_TSO
2685         ifp->if_capabilities |= IFCAP_TSO4;
2686 #endif
2687         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2688 #ifdef NET_LRO
2689         if (igb_header_split)
2690                 ifp->if_capabilities |= IFCAP_LRO;
2691 #endif
2692
2693         ifp->if_capenable = ifp->if_capabilities;
2694
2695         /*
2696          * Tell the upper layer(s) we support long frames.
2697          */
2698         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2699         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2700         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2701
2702         /*
2703          * Specify the media types supported by this adapter and register
2704          * callbacks to update media and link information
2705          */
2706         ifmedia_init(&adapter->media, IFM_IMASK,
2707             igb_media_change, igb_media_status);
2708         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2709             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2710                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2711                             0, NULL);
2712                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2713         } else {
2714                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2715                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2716                             0, NULL);
2717                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2718                             0, NULL);
2719                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2720                             0, NULL);
2721                 if (adapter->hw.phy.type != e1000_phy_ife) {
2722                         ifmedia_add(&adapter->media,
2723                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2724                         ifmedia_add(&adapter->media,
2725                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2726                 }
2727         }
2728         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2729         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2730 }
2731
2732
2733 /*
2734  * Manage DMA'able memory.
2735  */
2736 static void
2737 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2738 {
2739         if (error)
2740                 return;
2741         *(bus_addr_t *) arg = segs[0].ds_addr;
2742 }
2743
2744 static int
2745 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2746         struct igb_dma_alloc *dma, int mapflags)
2747 {
2748         int error;
2749
2750         error = bus_dma_tag_create(NULL,                /* parent */
2751                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2752                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2753                                 BUS_SPACE_MAXADDR,      /* highaddr */
2754                                 NULL, NULL,             /* filter, filterarg */
2755                                 size,                   /* maxsize */
2756                                 1,                      /* nsegments */
2757                                 size,                   /* maxsegsize */
2758                                 0,                      /* flags */
2759                                 &dma->dma_tag);
2760         if (error) {
2761                 device_printf(adapter->dev,
2762                     "%s: bus_dma_tag_create failed: %d\n",
2763                     __func__, error);
2764                 goto fail_0;
2765         }
2766
2767         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2768             BUS_DMA_NOWAIT, &dma->dma_map);
2769         if (error) {
2770                 device_printf(adapter->dev,
2771                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2772                     __func__, (uintmax_t)size, error);
2773                 goto fail_2;
2774         }
2775
2776         dma->dma_paddr = 0;
2777         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2778             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2779         if (error || dma->dma_paddr == 0) {
2780                 device_printf(adapter->dev,
2781                     "%s: bus_dmamap_load failed: %d\n",
2782                     __func__, error);
2783                 goto fail_3;
2784         }
2785
2786         return (0);
2787
2788 fail_3:
2789         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2790 fail_2:
2791         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2792         bus_dma_tag_destroy(dma->dma_tag);
2793 fail_0:
2794         dma->dma_map = NULL;
2795         dma->dma_tag = NULL;
2796
2797         return (error);
2798 }
2799
2800 static void
2801 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2802 {
2803         if (dma->dma_tag == NULL)
2804                 return;
2805         if (dma->dma_map != NULL) {
2806                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2807                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2808                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2809                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2810                 dma->dma_map = NULL;
2811         }
2812         bus_dma_tag_destroy(dma->dma_tag);
2813         dma->dma_tag = NULL;
2814 }
2815
2816
2817 /*********************************************************************
2818  *
2819  *  Allocate memory for the transmit and receive rings, and then
2820  *  the descriptors associated with each, called only once at attach.
2821  *
2822  **********************************************************************/
2823 static int
2824 igb_allocate_queues(struct adapter *adapter)
2825 {
2826         device_t dev = adapter->dev;
2827         struct igb_queue        *que = NULL;
2828         struct tx_ring          *txr = NULL;
2829         struct rx_ring          *rxr = NULL;
2830         int rsize, tsize, error = E1000_SUCCESS;
2831         int txconf = 0, rxconf = 0;
2832
2833         /* First allocate the top level queue structs */
2834         if (!(adapter->queues =
2835             (struct igb_queue *) kmalloc(sizeof(struct igb_queue) *
2836             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2837                 device_printf(dev, "Unable to allocate queue memory\n");
2838                 error = ENOMEM;
2839                 goto fail;
2840         }
2841
2842         /* Next allocate the TX ring struct memory */
2843         if (!(adapter->tx_rings =
2844             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2845             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2846                 device_printf(dev, "Unable to allocate TX ring memory\n");
2847                 error = ENOMEM;
2848                 goto tx_fail;
2849         }
2850
2851         /* Now allocate the RX */
2852         if (!(adapter->rx_rings =
2853             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2854             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2855                 device_printf(dev, "Unable to allocate RX ring memory\n");
2856                 error = ENOMEM;
2857                 goto rx_fail;
2858         }
2859
2860         tsize = roundup2(adapter->num_tx_desc *
2861             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2862         /*
2863          * Now set up the TX queues, txconf is needed to handle the
2864          * possibility that things fail midcourse and we need to
2865          * undo memory gracefully
2866          */ 
2867         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2868                 /* Set up some basics */
2869                 txr = &adapter->tx_rings[i];
2870                 txr->adapter = adapter;
2871                 txr->me = i;
2872
2873                 /* Initialize the TX lock */
2874                 ksnprintf(txr->spin_name, sizeof(txr->spin_name), "%s:tx(%d)",
2875                     device_get_nameunit(dev), txr->me);
2876
2877                 IGB_TX_LOCK_INIT(txr);
2878
2879                 if (igb_dma_malloc(adapter, tsize,
2880                         &txr->txdma, BUS_DMA_NOWAIT)) {
2881                         device_printf(dev,
2882                             "Unable to allocate TX Descriptor memory\n");
2883                         error = ENOMEM;
2884                         goto err_tx_desc;
2885                 }
2886                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2887                 bzero((void *)txr->tx_base, tsize);
2888
2889                 /* Now allocate transmit buffers for the ring */
2890                 if (igb_allocate_transmit_buffers(txr)) {
2891                         device_printf(dev,
2892                             "Critical Failure setting up transmit buffers\n");
2893                         error = ENOMEM;
2894                         goto err_tx_desc;
2895                 }
2896 #if __FreeBSD_version >= 800000
2897                 /* Allocate a buf ring */
2898                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2899                     M_WAITOK, &txr->tx_mtx);
2900 #endif
2901         }
2902
2903         /*
2904          * Next the RX queues...
2905          */ 
2906         rsize = roundup2(adapter->num_rx_desc *
2907             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2908         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2909                 rxr = &adapter->rx_rings[i];
2910                 rxr->adapter = adapter;
2911                 rxr->me = i;
2912
2913                 /* Initialize the RX lock */
2914                 ksnprintf(rxr->spin_name, sizeof(rxr->spin_name), "%s:rx(%d)",
2915                     device_get_nameunit(dev), txr->me);
2916
2917                 IGB_RX_LOCK_INIT(rxr);
2918
2919                 if (igb_dma_malloc(adapter, rsize,
2920                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2921                         device_printf(dev,
2922                             "Unable to allocate RxDescriptor memory\n");
2923                         error = ENOMEM;
2924                         goto err_rx_desc;
2925                 }
2926                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2927                 bzero((void *)rxr->rx_base, rsize);
2928
2929                 /* Allocate receive buffers for the ring*/
2930                 if (igb_allocate_receive_buffers(rxr)) {
2931                         device_printf(dev,
2932                             "Critical Failure setting up receive buffers\n");
2933                         error = ENOMEM;
2934                         goto err_rx_desc;
2935                 }
2936         }
2937
2938         /*
2939         ** Finally set up the queue holding structs
2940         */
2941         for (int i = 0; i < adapter->num_queues; i++) {
2942                 que = &adapter->queues[i];
2943                 que->adapter = adapter;
2944                 que->txr = &adapter->tx_rings[i];
2945                 que->rxr = &adapter->rx_rings[i];
2946         }
2947
2948         return (0);
2949
2950 err_rx_desc:
2951         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2952                 igb_dma_free(adapter, &rxr->rxdma);
2953 err_tx_desc:
2954         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2955                 igb_dma_free(adapter, &txr->txdma);
2956         kfree(adapter->rx_rings, M_DEVBUF);
2957 rx_fail:
2958 #if __FreeBSD_version >= 800000
2959         buf_ring_free(txr->br, M_DEVBUF);
2960 #endif
2961         kfree(adapter->tx_rings, M_DEVBUF);
2962 tx_fail:
2963         kfree(adapter->queues, M_DEVBUF);
2964 fail:
2965         return (error);
2966 }
2967
2968 /*********************************************************************
2969  *
2970  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2971  *  the information needed to transmit a packet on the wire. This is
2972  *  called only once at attach, setup is done every reset.
2973  *
2974  **********************************************************************/
2975 static int
2976 igb_allocate_transmit_buffers(struct tx_ring *txr)
2977 {
2978         struct adapter *adapter = txr->adapter;
2979         device_t dev = adapter->dev;
2980         struct igb_tx_buffer *txbuf;
2981         int error, i;
2982
2983         /*
2984          * Setup DMA descriptor areas.
2985          */
2986         if ((error = bus_dma_tag_create(NULL,
2987                                1, 0,                    /* alignment, bounds */
2988                                BUS_SPACE_MAXADDR,       /* lowaddr */
2989                                BUS_SPACE_MAXADDR,       /* highaddr */
2990                                NULL, NULL,              /* filter, filterarg */
2991                                IGB_TSO_SIZE,            /* maxsize */
2992                                IGB_MAX_SCATTER,         /* nsegments */
2993                                PAGE_SIZE,               /* maxsegsize */
2994                                0,                       /* flags */
2995                                &txr->txtag))) {
2996                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2997                 goto fail;
2998         }
2999
3000         if (!(txr->tx_buffers =
3001             (struct igb_tx_buffer *) kmalloc(sizeof(struct igb_tx_buffer) *
3002             adapter->num_tx_desc, M_DEVBUF, M_INTWAIT | M_ZERO))) {
3003                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3004                 error = ENOMEM;
3005                 goto fail;
3006         }
3007
3008         /* Create the descriptor buffer dma maps */
3009         txbuf = txr->tx_buffers;
3010         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3011                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3012                 if (error != 0) {
3013                         device_printf(dev, "Unable to create TX DMA map\n");
3014                         goto fail;
3015                 }
3016         }
3017
3018         return 0;
3019 fail:
3020         /* We free all, it handles case where we are in the middle */
3021         igb_free_transmit_structures(adapter);
3022         return (error);
3023 }
3024
3025 /*********************************************************************
3026  *
3027  *  Initialize a transmit ring.
3028  *
3029  **********************************************************************/
3030 static void
3031 igb_setup_transmit_ring(struct tx_ring *txr)
3032 {
3033         struct adapter *adapter = txr->adapter;
3034         struct igb_tx_buffer *txbuf;
3035         int i;
3036
3037         /* Clear the old descriptor contents */
3038         IGB_TX_LOCK(txr);
3039         bzero((void *)txr->tx_base,
3040               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3041         /* Reset indices */
3042         txr->next_avail_desc = 0;
3043         txr->next_to_clean = 0;
3044
3045         /* Free any existing tx buffers. */
3046         txbuf = txr->tx_buffers;
3047         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3048                 if (txbuf->m_head != NULL) {
3049                         bus_dmamap_sync(txr->txtag, txbuf->map,
3050                             BUS_DMASYNC_POSTWRITE);
3051                         bus_dmamap_unload(txr->txtag, txbuf->map);
3052                         m_freem(txbuf->m_head);
3053                         txbuf->m_head = NULL;
3054                 }
3055                 /* clear the watch index */
3056                 txbuf->next_eop = -1;
3057         }
3058
3059         /* Set number of descriptors available */
3060         txr->tx_avail = adapter->num_tx_desc;
3061
3062         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3063             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3064         IGB_TX_UNLOCK(txr);
3065 }
3066
3067 /*********************************************************************
3068  *
3069  *  Initialize all transmit rings.
3070  *
3071  **********************************************************************/
3072 static void
3073 igb_setup_transmit_structures(struct adapter *adapter)
3074 {
3075         struct tx_ring *txr = adapter->tx_rings;
3076
3077         for (int i = 0; i < adapter->num_queues; i++, txr++)
3078                 igb_setup_transmit_ring(txr);
3079
3080         return;
3081 }
3082
3083 /*********************************************************************
3084  *
3085  *  Enable transmit unit.
3086  *
3087  **********************************************************************/
3088 static void
3089 igb_initialize_transmit_units(struct adapter *adapter)
3090 {
3091         struct tx_ring  *txr = adapter->tx_rings;
3092         struct e1000_hw *hw = &adapter->hw;
3093         u32             tctl, txdctl;
3094
3095         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3096
3097         /* Setup the Tx Descriptor Rings */
3098         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3099                 u64 bus_addr = txr->txdma.dma_paddr;
3100
3101                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3102                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3103                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3104                     (uint32_t)(bus_addr >> 32));
3105                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3106                     (uint32_t)bus_addr);
3107
3108                 /* Setup the HW Tx Head and Tail descriptor pointers */
3109                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3110                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3111
3112                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3113                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3114                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3115
3116                 txr->watchdog_check = FALSE;
3117
3118                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3119                 txdctl |= IGB_TX_PTHRESH;
3120                 txdctl |= IGB_TX_HTHRESH << 8;
3121                 txdctl |= IGB_TX_WTHRESH << 16;
3122                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3123                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3124         }
3125
3126         /* Program the Transmit Control Register */
3127         tctl = E1000_READ_REG(hw, E1000_TCTL);
3128         tctl &= ~E1000_TCTL_CT;
3129         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3130                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3131
3132         e1000_config_collision_dist(hw);
3133
3134         /* This write will effectively turn on the transmit unit. */
3135         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3136 }
3137
3138 /*********************************************************************
3139  *
3140  *  Free all transmit rings.
3141  *
3142  **********************************************************************/
3143 static void
3144 igb_free_transmit_structures(struct adapter *adapter)
3145 {
3146         struct tx_ring *txr = adapter->tx_rings;
3147
3148         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3149                 IGB_TX_LOCK(txr);
3150                 igb_free_transmit_buffers(txr);
3151                 igb_dma_free(adapter, &txr->txdma);
3152                 IGB_TX_UNLOCK(txr);
3153                 IGB_TX_LOCK_DESTROY(txr);
3154         }
3155         kfree(adapter->tx_rings, M_DEVBUF);
3156 }
3157
3158 /*********************************************************************
3159  *
3160  *  Free transmit ring related data structures.
3161  *
3162  **********************************************************************/
3163 static void
3164 igb_free_transmit_buffers(struct tx_ring *txr)
3165 {
3166         struct adapter *adapter = txr->adapter;
3167         struct igb_tx_buffer *tx_buffer;
3168         int             i;
3169
3170         INIT_DEBUGOUT("free_transmit_ring: begin");
3171
3172         if (txr->tx_buffers == NULL)
3173                 return;
3174
3175         tx_buffer = txr->tx_buffers;
3176         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3177                 if (tx_buffer->m_head != NULL) {
3178                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3179                             BUS_DMASYNC_POSTWRITE);
3180                         bus_dmamap_unload(txr->txtag,
3181                             tx_buffer->map);
3182                         m_freem(tx_buffer->m_head);
3183                         tx_buffer->m_head = NULL;
3184                         if (tx_buffer->map != NULL) {
3185                                 bus_dmamap_destroy(txr->txtag,
3186                                     tx_buffer->map);
3187                                 tx_buffer->map = NULL;
3188                         }
3189                 } else if (tx_buffer->map != NULL) {
3190                         bus_dmamap_unload(txr->txtag,
3191                             tx_buffer->map);
3192                         bus_dmamap_destroy(txr->txtag,
3193                             tx_buffer->map);
3194                         tx_buffer->map = NULL;
3195                 }
3196         }
3197 #if __FreeBSD_version >= 800000
3198         if (txr->br != NULL)
3199                 buf_ring_free(txr->br, M_DEVBUF);
3200 #endif
3201         if (txr->tx_buffers != NULL) {
3202                 kfree(txr->tx_buffers, M_DEVBUF);
3203                 txr->tx_buffers = NULL;
3204         }
3205         if (txr->txtag != NULL) {
3206                 bus_dma_tag_destroy(txr->txtag);
3207                 txr->txtag = NULL;
3208         }
3209         return;
3210 }
3211
3212 /**********************************************************************
3213  *
3214  *  Setup work for hardware segmentation offload (TSO)
3215  *
3216  **********************************************************************/
3217 #ifdef NET_TSO 
3218 static boolean_t
3219 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3220 {
3221         struct adapter *adapter = txr->adapter;
3222         struct e1000_adv_tx_context_desc *TXD;
3223         struct igb_tx_buffer        *tx_buffer;
3224         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3225         u32 mss_l4len_idx = 0;
3226         u16 vtag = 0;
3227         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3228         struct ether_vlan_header *eh;
3229         struct ip *ip;
3230         struct tcphdr *th;
3231
3232
3233         /*
3234          * Determine where frame payload starts.
3235          * Jump over vlan headers if already present
3236          */
3237         eh = mtod(mp, struct ether_vlan_header *);
3238         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3239                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3240         else
3241                 ehdrlen = ETHER_HDR_LEN;
3242
3243         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3244         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3245                 return FALSE;
3246
3247         /* Only supports IPV4 for now */
3248         ctxd = txr->next_avail_desc;
3249         tx_buffer = &txr->tx_buffers[ctxd];
3250         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3251
3252         ip = (struct ip *)(mp->m_data + ehdrlen);
3253         if (ip->ip_p != IPPROTO_TCP)
3254                 return FALSE;   /* 0 */
3255         ip->ip_sum = 0;
3256         ip_hlen = ip->ip_hl << 2;
3257         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3258         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3259             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3260         tcp_hlen = th->th_off << 2;
3261         /*
3262          * Calculate header length, this is used
3263          * in the transmit desc in igb_xmit
3264          */
3265         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3266
3267         /* VLAN MACLEN IPLEN */
3268         if (mp->m_flags & M_VLANTAG) {
3269                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3270                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3271         }
3272
3273         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3274         vlan_macip_lens |= ip_hlen;
3275         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3276
3277         /* ADV DTYPE TUCMD */
3278         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3279         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3280         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3281         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3282
3283         /* MSS L4LEN IDX */
3284         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3285         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3286         /* 82575 needs the queue index added */
3287         if (adapter->hw.mac.type == e1000_82575)
3288                 mss_l4len_idx |= txr->me << 4;
3289         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3290
3291         TXD->seqnum_seed = htole32(0);
3292         tx_buffer->m_head = NULL;
3293         tx_buffer->next_eop = -1;
3294
3295         if (++ctxd == adapter->num_tx_desc)
3296                 ctxd = 0;
3297
3298         txr->tx_avail--;
3299         txr->next_avail_desc = ctxd;
3300         return TRUE;
3301 }
3302 #endif
3303
3304 /*********************************************************************
3305  *
3306  *  Context Descriptor setup for VLAN or CSUM
3307  *
3308  **********************************************************************/
3309
3310 static bool
3311 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3312 {
3313         struct adapter *adapter = txr->adapter;
3314         struct e1000_adv_tx_context_desc *TXD;
3315         struct igb_tx_buffer        *tx_buffer;
3316         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3317         struct ether_vlan_header *eh;
3318         struct ip *ip = NULL;
3319         struct ip6_hdr *ip6;
3320         int  ehdrlen, ctxd, ip_hlen = 0;
3321         u16     etype, vtag = 0;
3322         u8      ipproto = 0;
3323         bool    offload = TRUE;
3324
3325         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3326                 offload = FALSE;
3327
3328         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3329         ctxd = txr->next_avail_desc;
3330         tx_buffer = &txr->tx_buffers[ctxd];
3331         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3332
3333         /*
3334         ** In advanced descriptors the vlan tag must 
3335         ** be placed into the context descriptor, thus
3336         ** we need to be here just for that setup.
3337         */
3338         if (mp->m_flags & M_VLANTAG) {
3339                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3340                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3341         } else if (offload == FALSE)
3342                 return FALSE;
3343
3344         /*
3345          * Determine where frame payload starts.
3346          * Jump over vlan headers if already present,
3347          * helpful for QinQ too.
3348          */
3349         eh = mtod(mp, struct ether_vlan_header *);
3350         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3351                 etype = ntohs(eh->evl_proto);
3352                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3353         } else {
3354                 etype = ntohs(eh->evl_encap_proto);
3355                 ehdrlen = ETHER_HDR_LEN;
3356         }
3357
3358         /* Set the ether header length */
3359         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3360
3361         switch (etype) {
3362                 case ETHERTYPE_IP:
3363                         ip = (struct ip *)(mp->m_data + ehdrlen);
3364                         ip_hlen = ip->ip_hl << 2;
3365                         if (mp->m_len < ehdrlen + ip_hlen) {
3366                                 offload = FALSE;
3367                                 break;
3368                         }
3369                         ipproto = ip->ip_p;
3370                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3371                         break;
3372                 case ETHERTYPE_IPV6:
3373                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3374                         ip_hlen = sizeof(struct ip6_hdr);
3375                         if (mp->m_len < ehdrlen + ip_hlen)
3376                                 return (FALSE);
3377                         ipproto = ip6->ip6_nxt;
3378                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3379                         break;
3380                 default:
3381                         offload = FALSE;
3382                         break;
3383         }
3384
3385         vlan_macip_lens |= ip_hlen;
3386         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3387
3388         switch (ipproto) {
3389                 case IPPROTO_TCP:
3390                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3391                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3392                         break;
3393                 case IPPROTO_UDP:
3394                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3395                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3396                         break;
3397 #if __FreeBSD_version >= 800000
3398                 case IPPROTO_SCTP:
3399                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3400                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3401                         break;
3402 #endif
3403                 default:
3404                         offload = FALSE;
3405                         break;
3406         }
3407
3408         /* 82575 needs the queue index added */
3409         if (adapter->hw.mac.type == e1000_82575)
3410                 mss_l4len_idx = txr->me << 4;
3411
3412         /* Now copy bits into descriptor */
3413         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3414         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3415         TXD->seqnum_seed = htole32(0);
3416         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3417
3418         tx_buffer->m_head = NULL;
3419         tx_buffer->next_eop = -1;
3420
3421         /* We've consumed the first desc, adjust counters */
3422         if (++ctxd == adapter->num_tx_desc)
3423                 ctxd = 0;
3424         txr->next_avail_desc = ctxd;
3425         --txr->tx_avail;
3426
3427         return (offload);
3428 }
3429
3430
3431 /**********************************************************************
3432  *
3433  *  Examine each tx_buffer in the used queue. If the hardware is done
3434  *  processing the packet then free associated resources. The
3435  *  tx_buffer is put back on the free queue.
3436  *
3437  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3438  **********************************************************************/
3439 static bool
3440 igb_txeof(struct tx_ring *txr)
3441 {
3442         struct adapter  *adapter = txr->adapter;
3443         int first, last, done;
3444         struct igb_tx_buffer *tx_buffer;
3445         struct e1000_tx_desc   *tx_desc, *eop_desc;
3446         struct ifnet   *ifp = adapter->ifp;
3447
3448         IGB_TX_LOCK_ASSERT(txr);
3449
3450         if (txr->tx_avail == adapter->num_tx_desc)
3451                 return FALSE;
3452
3453         first = txr->next_to_clean;
3454         tx_desc = &txr->tx_base[first];
3455         tx_buffer = &txr->tx_buffers[first];
3456         last = tx_buffer->next_eop;
3457         eop_desc = &txr->tx_base[last];
3458
3459         /*
3460          * What this does is get the index of the
3461          * first descriptor AFTER the EOP of the 
3462          * first packet, that way we can do the
3463          * simple comparison on the inner while loop.
3464          */
3465         if (++last == adapter->num_tx_desc)
3466                 last = 0;
3467         done = last;
3468
3469         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3470             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3471
3472         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3473                 /* We clean the range of the packet */
3474                 while (first != done) {
3475                         tx_desc->upper.data = 0;
3476                         tx_desc->lower.data = 0;
3477                         tx_desc->buffer_addr = 0;
3478                         ++txr->tx_avail;
3479
3480                         if (tx_buffer->m_head) {
3481                                 txr->bytes +=
3482                                     tx_buffer->m_head->m_pkthdr.len;
3483                                 bus_dmamap_sync(txr->txtag,
3484                                     tx_buffer->map,
3485                                     BUS_DMASYNC_POSTWRITE);
3486                                 bus_dmamap_unload(txr->txtag,
3487                                     tx_buffer->map);
3488
3489                                 m_freem(tx_buffer->m_head);
3490                                 tx_buffer->m_head = NULL;
3491                         }
3492                         tx_buffer->next_eop = -1;
3493                         txr->watchdog_time = ticks;
3494
3495                         if (++first == adapter->num_tx_desc)
3496                                 first = 0;
3497
3498                         tx_buffer = &txr->tx_buffers[first];
3499                         tx_desc = &txr->tx_base[first];
3500                 }
3501                 ++txr->packets;
3502                 ++ifp->if_opackets;
3503                 /* See if we can continue to the next packet */
3504                 last = tx_buffer->next_eop;
3505                 if (last != -1) {
3506                         eop_desc = &txr->tx_base[last];
3507                         /* Get new done point */
3508                         if (++last == adapter->num_tx_desc) last = 0;
3509                         done = last;
3510                 } else
3511                         break;
3512         }
3513         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3514             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3515
3516         txr->next_to_clean = first;
3517
3518         /*
3519          * If we have enough room, clear IFF_DRV_OACTIVE
3520          * to tell the stack that it is OK to send packets.
3521          */
3522         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3523                 ifp->if_flags &= ~IFF_OACTIVE;
3524                 /* All clean, turn off the watchdog */
3525                 if (txr->tx_avail == adapter->num_tx_desc) {
3526                         txr->watchdog_check = FALSE;
3527                         return FALSE;
3528                 }
3529         }
3530
3531         return (TRUE);
3532 }
3533
3534
3535 /*********************************************************************
3536  *
3537  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3538  *              i - designates the ring index
3539  *              clean - tells the function whether to update
3540  *                      the header, the packet buffer, or both.
3541  *
3542  **********************************************************************/
3543 static int
3544 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3545 {
3546         struct adapter          *adapter = rxr->adapter;
3547         struct igb_rx_buf       *rxbuf;
3548         struct mbuf             *mh, *mp;
3549         bus_dma_segment_t       hseg[1];
3550         bus_dma_segment_t       pseg[1];
3551         bus_dmamap_t            map;
3552         int                     nsegs, error;
3553         int                     mbflags;
3554
3555         /*
3556          * Init-time loads are allowed to use a blocking mbuf allocation,
3557          * otherwise the sheer number of mbufs allocated can lead to
3558          * failures.
3559          */
3560         mbflags = (clean & IGB_CLEAN_INITIAL) ? MB_WAIT : MB_DONTWAIT;
3561
3562         rxbuf = &rxr->rx_buffers[i];
3563         mh = mp = NULL;
3564         if ((clean & IGB_CLEAN_HEADER) != 0) {
3565                 mh = m_gethdr(mbflags, MT_DATA);
3566                 if (mh == NULL) {
3567                         adapter->mbuf_header_failed++;          
3568                         return (ENOBUFS);
3569                 }
3570                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3571                 /*
3572                  * Because IGB_HDR_BUF size is less than MHLEN
3573                  * and we configure controller to split headers
3574                  * we can align mbuf on ETHER_ALIGN boundary.
3575                  */
3576                 m_adj(mh, ETHER_ALIGN);
3577                 error = bus_dmamap_load_mbuf_segment(rxr->rx_htag,
3578                     rxr->rx_hspare_map, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3579                 if (error != 0) {
3580                         m_freem(mh);
3581                         return (error);
3582                 }
3583                 mh->m_flags &= ~M_PKTHDR;
3584         }
3585         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3586                 mp = m_getl(adapter->rx_mbuf_sz, mbflags, MT_DATA,
3587                             M_PKTHDR, NULL);
3588 #if 0
3589                 mp = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR,
3590                     adapter->rx_mbuf_sz);
3591 #endif
3592                 if (mp == NULL) {
3593                         if (mh != NULL) {
3594                                 adapter->mbuf_packet_failed++;          
3595                                 bus_dmamap_unload(rxr->rx_htag,
3596                                     rxbuf->head_map);
3597                                 mh->m_flags |= M_PKTHDR;
3598                                 m_freem(mh);
3599                         }
3600                         return (ENOBUFS);
3601                 }
3602                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3603                 error = bus_dmamap_load_mbuf_segment(rxr->rx_ptag,
3604                     rxr->rx_pspare_map, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3605                 if (error != 0) {
3606                         if (mh != NULL) {
3607                                 bus_dmamap_unload(rxr->rx_htag,
3608                                     rxbuf->head_map);
3609                                 mh->m_flags |= M_PKTHDR;
3610                                 m_freem(mh);
3611                         }
3612                         m_freem(mp);
3613                         return (error);
3614                 }
3615                 mp->m_flags &= ~M_PKTHDR;
3616         }
3617
3618         /* Loading new DMA maps complete, unload maps for received buffers. */
3619         if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3620                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3621                     BUS_DMASYNC_POSTREAD);
3622                 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3623         }
3624         if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3625                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3626                     BUS_DMASYNC_POSTREAD);
3627                 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3628         }
3629
3630         /* Reflect loaded dmamaps. */
3631         if ((clean & IGB_CLEAN_HEADER) != 0) {
3632                 map = rxbuf->head_map;
3633                 rxbuf->head_map = rxr->rx_hspare_map;
3634                 rxr->rx_hspare_map = map;
3635                 rxbuf->m_head = mh;
3636                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3637                     BUS_DMASYNC_PREREAD);
3638                 rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3639         }
3640         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3641                 map = rxbuf->pack_map;
3642                 rxbuf->pack_map = rxr->rx_pspare_map;
3643                 rxr->rx_pspare_map = map;
3644                 rxbuf->m_pack = mp;
3645                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3646                     BUS_DMASYNC_PREREAD);
3647                 rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3648         }
3649
3650         return (0);
3651 }
3652
3653 /*********************************************************************
3654  *
3655  *  Allocate memory for rx_buffer structures. Since we use one
3656  *  rx_buffer per received packet, the maximum number of rx_buffer's
3657  *  that we'll need is equal to the number of receive descriptors
3658  *  that we've allocated.
3659  *
3660  **********************************************************************/
3661 static int
3662 igb_allocate_receive_buffers(struct rx_ring *rxr)
3663 {
3664         struct  adapter         *adapter = rxr->adapter;
3665         device_t                dev = adapter->dev;
3666         struct igb_rx_buf       *rxbuf;
3667         int                     i, bsize, error;
3668
3669         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3670         if (!(rxr->rx_buffers =
3671             (struct igb_rx_buf *) kmalloc(bsize,
3672             M_DEVBUF, M_INTWAIT | M_ZERO))) {
3673                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3674                 error = ENOMEM;
3675                 goto fail;
3676         }
3677
3678         if ((error = bus_dma_tag_create(NULL,
3679                                    1, 0,                /* alignment, bounds */
3680                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3681                                    BUS_SPACE_MAXADDR,   /* highaddr */
3682                                    NULL, NULL,          /* filter, filterarg */
3683                                    MSIZE,               /* maxsize */
3684                                    1,                   /* nsegments */
3685                                    MSIZE,               /* maxsegsize */
3686                                    0,                   /* flags */
3687                                    &rxr->rx_htag))) {
3688                 device_printf(dev, "Unable to create RX DMA tag\n");
3689                 goto fail;
3690         }
3691
3692         if ((error = bus_dma_tag_create(NULL,
3693                                    1, 0,                /* alignment, bounds */
3694                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3695                                    BUS_SPACE_MAXADDR,   /* highaddr */
3696                                    NULL, NULL,          /* filter, filterarg */
3697                                    MJUMPAGESIZE,        /* maxsize */
3698                                    1,                   /* nsegments */
3699                                    MJUMPAGESIZE,        /* maxsegsize */
3700                                    0,                   /* flags */
3701                                    &rxr->rx_ptag))) {
3702                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3703                 goto fail;
3704         }
3705
3706         /* Create the spare maps (used by getbuf) */
3707         error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3708              &rxr->rx_hspare_map);
3709         if (error) {
3710                 device_printf(dev,
3711                     "%s: bus_dmamap_create header spare failed: %d\n",
3712                     __func__, error);
3713                 goto fail;
3714         }
3715         error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3716              &rxr->rx_pspare_map);
3717         if (error) {
3718                 device_printf(dev,
3719                     "%s: bus_dmamap_create packet spare failed: %d\n",
3720                     __func__, error);
3721                 goto fail;
3722         }
3723
3724         for (i = 0; i < adapter->num_rx_desc; i++) {
3725                 rxbuf = &rxr->rx_buffers[i];
3726                 error = bus_dmamap_create(rxr->rx_htag,
3727                     BUS_DMA_NOWAIT, &rxbuf->head_map);
3728                 if (error) {
3729                         device_printf(dev,
3730                             "Unable to create RX head DMA maps\n");
3731                         goto fail;
3732                 }
3733                 error = bus_dmamap_create(rxr->rx_ptag,
3734                     BUS_DMA_NOWAIT, &rxbuf->pack_map);
3735                 if (error) {
3736                         device_printf(dev,
3737                             "Unable to create RX packet DMA maps\n");
3738                         goto fail;
3739                 }
3740         }
3741
3742         return (0);
3743
3744 fail:
3745         /* Frees all, but can handle partial completion */
3746         igb_free_receive_structures(adapter);
3747         return (error);
3748 }
3749
3750
3751 static void
3752 igb_free_receive_ring(struct rx_ring *rxr)
3753 {
3754         struct  adapter         *adapter;
3755         struct igb_rx_buf       *rxbuf;
3756         int i;
3757
3758         adapter = rxr->adapter;
3759         for (i = 0; i < adapter->num_rx_desc; i++) {
3760                 rxbuf = &rxr->rx_buffers[i];
3761                 if (rxbuf->m_head != NULL) {
3762                         bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3763                             BUS_DMASYNC_POSTREAD);
3764                         bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3765                         rxbuf->m_head->m_flags |= M_PKTHDR;
3766                         m_freem(rxbuf->m_head);
3767                 }
3768                 if (rxbuf->m_pack != NULL) {
3769                         bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3770                             BUS_DMASYNC_POSTREAD);
3771                         bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3772                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3773                         m_freem(rxbuf->m_pack);
3774                 }
3775                 rxbuf->m_head = NULL;
3776                 rxbuf->m_pack = NULL;
3777         }
3778 }
3779
3780
3781 /*********************************************************************
3782  *
3783  *  Initialize a receive ring and its buffers.
3784  *
3785  **********************************************************************/
3786 static int
3787 igb_setup_receive_ring(struct rx_ring *rxr)
3788 {
3789         struct  adapter         *adapter;
3790         struct  ifnet           *ifp;
3791         device_t                dev;
3792 #ifdef NET_LRO 
3793         struct lro_ctrl         *lro = &rxr->lro;
3794 #endif
3795         int                     j, rsize, error = 0;
3796
3797         adapter = rxr->adapter;
3798         dev = adapter->dev;
3799         ifp = adapter->ifp;
3800
3801         /* Clear the ring contents */
3802         IGB_RX_LOCK(rxr);
3803         rsize = roundup2(adapter->num_rx_desc *
3804             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3805         bzero((void *)rxr->rx_base, rsize);
3806
3807         /*
3808         ** Free current RX buffer structures and their mbufs
3809         */
3810         igb_free_receive_ring(rxr);
3811
3812         /* Now replenish the ring mbufs */
3813         for (j = 0; j < adapter->num_rx_desc; j++) {
3814                 error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH | IGB_CLEAN_INITIAL);
3815                 if (error)
3816                         goto fail;
3817         }
3818
3819         /* Setup our descriptor indices */
3820         rxr->next_to_check = 0;
3821         rxr->last_cleaned = 0;
3822         rxr->lro_enabled = FALSE;
3823
3824         if (igb_header_split)
3825                 rxr->hdr_split = TRUE;
3826 #if NET_LRO 
3827         else
3828                 ifp->if_capabilities &= ~IFCAP_LRO;
3829 #endif
3830
3831         rxr->fmp = NULL;
3832         rxr->lmp = NULL;
3833         rxr->discard = FALSE;
3834
3835         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3836             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3837
3838         /*
3839         ** Now set up the LRO interface, we
3840         ** also only do head split when LRO
3841         ** is enabled, since so often they
3842         ** are undesireable in similar setups.
3843         */
3844 #if NET_LRO 
3845         if (ifp->if_capenable & IFCAP_LRO) {
3846                 int err = tcp_lro_init(lro);
3847                 if (err) {
3848                         device_printf(dev, "LRO Initialization failed!\n");
3849                         goto fail;
3850                 }
3851                 INIT_DEBUGOUT("RX LRO Initialized\n");
3852                 rxr->lro_enabled = TRUE;
3853                 lro->ifp = adapter->ifp;
3854         }
3855 #endif
3856
3857         IGB_RX_UNLOCK(rxr);
3858         return (0);
3859
3860 fail:
3861         igb_free_receive_ring(rxr);
3862         IGB_RX_UNLOCK(rxr);
3863         return (error);
3864 }
3865
3866 /*********************************************************************
3867  *
3868  *  Initialize all receive rings.
3869  *
3870  **********************************************************************/
3871 static int
3872 igb_setup_receive_structures(struct adapter *adapter)
3873 {
3874         struct rx_ring *rxr = adapter->rx_rings;
3875         int i, j;
3876
3877         for (i = 0; i < adapter->num_queues; i++, rxr++)
3878                 if (igb_setup_receive_ring(rxr))
3879                         goto fail;
3880
3881         return (0);
3882 fail:
3883         /*
3884          * Free RX buffers allocated so far, we will only handle
3885          * the rings that completed, the failing case will have
3886          * cleaned up for itself. The value of 'i' will be the
3887          * failed ring so we must pre-decrement it.
3888          */
3889         rxr = adapter->rx_rings;
3890         for (--i; i > 0; i--, rxr++) {
3891                 for (j = 0; j < adapter->num_rx_desc; j++)
3892                         igb_free_receive_ring(rxr);
3893         }
3894
3895         return (ENOBUFS);
3896 }
3897
3898 /*********************************************************************
3899  *
3900  *  Enable receive unit.
3901  *
3902  **********************************************************************/
3903 static void
3904 igb_initialize_receive_units(struct adapter *adapter)
3905 {
3906         struct rx_ring  *rxr = adapter->rx_rings;
3907         struct ifnet    *ifp = adapter->ifp;
3908         struct e1000_hw *hw = &adapter->hw;
3909         u32             rctl, rxcsum, psize, srrctl = 0;
3910
3911         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3912
3913         /*
3914          * Make sure receives are disabled while setting
3915          * up the descriptor ring
3916          */
3917         rctl = E1000_READ_REG(hw, E1000_RCTL);
3918         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3919
3920         /*
3921         ** Set up for header split
3922         */
3923         if (rxr->hdr_split) {
3924                 /* Use a standard mbuf for the header */
3925                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3926                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3927         } else
3928                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3929
3930         /*
3931         ** Set up for jumbo frames
3932         */
3933         if (ifp->if_mtu > ETHERMTU) {
3934                 rctl |= E1000_RCTL_LPE;
3935                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3936                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3937
3938                 /* Set maximum packet len */
3939                 psize = adapter->max_frame_size;
3940                 /* are we on a vlan? */
3941                 if (adapter->ifp->if_vlantrunks != NULL)
3942                         psize += VLAN_TAG_SIZE;
3943                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3944         } else {
3945                 rctl &= ~E1000_RCTL_LPE;
3946                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3947                 rctl |= E1000_RCTL_SZ_2048;
3948         }
3949
3950         /* Setup the Base and Length of the Rx Descriptor Rings */
3951         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3952                 u64 bus_addr = rxr->rxdma.dma_paddr;
3953                 u32 rxdctl;
3954
3955                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3956                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3957                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3958                     (uint32_t)(bus_addr >> 32));
3959                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3960                     (uint32_t)bus_addr);
3961                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3962                 /* Enable this Queue */
3963                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3964                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3965                 rxdctl &= 0xFFF00000;
3966                 rxdctl |= IGB_RX_PTHRESH;
3967                 rxdctl |= IGB_RX_HTHRESH << 8;
3968                 rxdctl |= IGB_RX_WTHRESH << 16;
3969                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3970         }
3971
3972         /*
3973         ** Setup for RX MultiQueue
3974         */
3975         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3976         if (adapter->num_queues >1) {
3977                 u32 random[10], mrqc, shift = 0;
3978                 union igb_reta {
3979                         u32 dword;
3980                         u8  bytes[4];
3981                 } reta;
3982
3983                 karc4rand(&random, sizeof(random));
3984                 if (adapter->hw.mac.type == e1000_82575)
3985                         shift = 6;
3986                 /* Warning FM follows */
3987                 for (int i = 0; i < 128; i++) {
3988                         reta.bytes[i & 3] =
3989                             (i % adapter->num_queues) << shift;
3990                         if ((i & 3) == 3)
3991                                 E1000_WRITE_REG(hw,
3992                                     E1000_RETA(i >> 2), reta.dword);
3993                 }
3994                 /* Now fill in hash table */
3995                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3996                 for (int i = 0; i < 10; i++)
3997                         E1000_WRITE_REG_ARRAY(hw,
3998                             E1000_RSSRK(0), i, random[i]);
3999
4000                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4001                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4002                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4003                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4004                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4005                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4006                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4007                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4008
4009                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4010
4011                 /*
4012                 ** NOTE: Receive Full-Packet Checksum Offload 
4013                 ** is mutually exclusive with Multiqueue. However
4014                 ** this is not the same as TCP/IP checksums which
4015                 ** still work.
4016                 */
4017                 rxcsum |= E1000_RXCSUM_PCSD;
4018 #if __FreeBSD_version >= 800000
4019                 /* For SCTP Offload */
4020                 if ((hw->mac.type == e1000_82576)
4021                     && (ifp->if_capenable & IFCAP_RXCSUM))
4022                         rxcsum |= E1000_RXCSUM_CRCOFL;
4023 #endif
4024         } else {
4025                 /* Non RSS setup */
4026                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4027                         rxcsum |= E1000_RXCSUM_IPPCSE;
4028 #if __FreeBSD_version >= 800000
4029                         if (adapter->hw.mac.type == e1000_82576)
4030                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4031 #endif
4032                 } else
4033                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4034         }
4035         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4036
4037         /* Setup the Receive Control Register */
4038         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4039         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4040                    E1000_RCTL_RDMTS_HALF |
4041                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4042         /* Strip CRC bytes. */
4043         rctl |= E1000_RCTL_SECRC;
4044         /* Make sure VLAN Filters are off */
4045         rctl &= ~E1000_RCTL_VFE;
4046         /* Don't store bad packets */
4047         rctl &= ~E1000_RCTL_SBP;
4048
4049         /* Enable Receives */
4050         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4051
4052         /*
4053          * Setup the HW Rx Head and Tail Descriptor Pointers
4054          *   - needs to be after enable
4055          */
4056         for (int i = 0; i < adapter->num_queues; i++) {
4057                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4058                 E1000_WRITE_REG(hw, E1000_RDT(i),
4059                      adapter->num_rx_desc - 1);
4060         }
4061         return;
4062 }
4063
4064 /*********************************************************************
4065  *
4066  *  Free receive rings.
4067  *
4068  **********************************************************************/
4069 static void
4070 igb_free_receive_structures(struct adapter *adapter)
4071 {
4072         struct rx_ring *rxr = adapter->rx_rings;
4073
4074         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4075 #ifdef NET_LRO 
4076                 struct lro_ctrl *lro = &rxr->lro;
4077 #endif
4078                 IGB_RX_LOCK(rxr);
4079                 igb_free_receive_buffers(rxr);
4080 #ifdef NET_LRO
4081                 tcp_lro_free(lro);
4082 #endif
4083                 igb_dma_free(adapter, &rxr->rxdma);
4084                 IGB_RX_UNLOCK(rxr);
4085                 IGB_RX_LOCK_DESTROY(rxr);
4086         }
4087
4088         kfree(adapter->rx_rings, M_DEVBUF);
4089 }
4090
4091 /*********************************************************************
4092  *
4093  *  Free receive ring data structures.
4094  *
4095  **********************************************************************/
4096 static void
4097 igb_free_receive_buffers(struct rx_ring *rxr)
4098 {
4099         struct adapter          *adapter = rxr->adapter;
4100         struct igb_rx_buf       *rxbuf;
4101         int i;
4102
4103         INIT_DEBUGOUT("free_receive_structures: begin");
4104
4105         if (rxr->rx_hspare_map != NULL) {
4106                 bus_dmamap_destroy(rxr->rx_htag, rxr->rx_hspare_map);
4107                 rxr->rx_hspare_map = NULL;
4108         }
4109
4110         if (rxr->rx_hspare_map != NULL) {
4111                 bus_dmamap_destroy(rxr->rx_ptag, rxr->rx_pspare_map);
4112                 rxr->rx_pspare_map = NULL;
4113         }
4114
4115         /* Cleanup any existing buffers */
4116         if (rxr->rx_buffers != NULL) {
4117                 for (i = 0; i < adapter->num_rx_desc; i++) {
4118                         rxbuf = &rxr->rx_buffers[i];
4119                         if (rxbuf->m_head != NULL) {
4120                                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
4121                                     BUS_DMASYNC_POSTREAD);
4122                                 bus_dmamap_unload(rxr->rx_htag,
4123                                     rxbuf->head_map);
4124                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4125                                 m_freem(rxbuf->m_head);
4126                         }
4127                         if (rxbuf->m_pack != NULL) {
4128                                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
4129                                     BUS_DMASYNC_POSTREAD);
4130                                 bus_dmamap_unload(rxr->rx_ptag,
4131                                     rxbuf->pack_map);
4132                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4133                                 m_freem(rxbuf->m_pack);
4134                         }
4135                         rxbuf->m_head = NULL;
4136                         rxbuf->m_pack = NULL;
4137                         if (rxbuf->head_map != NULL) {
4138                                 bus_dmamap_destroy(rxr->rx_htag,
4139                                     rxbuf->head_map);
4140                                 rxbuf->head_map = NULL;
4141                         }
4142                         if (rxbuf->pack_map != NULL) {
4143                                 bus_dmamap_destroy(rxr->rx_ptag,
4144                                     rxbuf->pack_map);
4145                                 rxbuf->pack_map = NULL;
4146                         }
4147                 }
4148                 if (rxr->rx_buffers != NULL) {
4149                         kfree(rxr->rx_buffers, M_DEVBUF);
4150                         rxr->rx_buffers = NULL;
4151                 }
4152         }
4153
4154         if (rxr->rx_htag != NULL) {
4155                 bus_dma_tag_destroy(rxr->rx_htag);
4156                 rxr->rx_htag = NULL;
4157         }
4158         if (rxr->rx_ptag != NULL) {
4159                 bus_dma_tag_destroy(rxr->rx_ptag);
4160                 rxr->rx_ptag = NULL;
4161         }
4162 }
4163
4164 static __inline void
4165 igb_rx_discard(struct rx_ring *rxr, union e1000_adv_rx_desc *cur, int i)
4166 {
4167
4168         if (rxr->fmp != NULL) {
4169                 rxr->fmp->m_flags |= M_PKTHDR;
4170                 m_freem(rxr->fmp);
4171                 rxr->fmp = NULL;
4172                 rxr->lmp = NULL;
4173         }
4174 }
4175
4176 static __inline void
4177 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4178 {
4179
4180         /*
4181          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4182          * should be computed by hardware. Also it should not have VLAN tag in
4183          * ethernet header.
4184          */
4185 #ifdef NET_LRO
4186         if (rxr->lro_enabled &&
4187             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4188             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4189             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4190             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4191             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4192             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4193                 /*
4194                  * Send to the stack if:
4195                  **  - LRO not enabled, or
4196                  **  - no LRO resources, or
4197                  **  - lro enqueue fails
4198                  */
4199                 if (rxr->lro.lro_cnt != 0)
4200                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4201                                 return;
4202         }
4203 #endif
4204         (*ifp->if_input)(ifp, m);
4205 }
4206
4207 /*********************************************************************
4208  *
4209  *  This routine executes in interrupt context. It replenishes
4210  *  the mbufs in the descriptor and sends data which has been
4211  *  dma'ed into host memory to upper layer.
4212  *
4213  *  We loop at most count times if count is > 0, or until done if
4214  *  count < 0.
4215  *
4216  *  Return TRUE if more to clean, FALSE otherwise
4217  *********************************************************************/
4218 static bool
4219 igb_rxeof(struct rx_ring *rxr, int count)
4220 {
4221         struct adapter          *adapter = rxr->adapter;
4222         struct ifnet            *ifp = adapter->ifp;
4223 #ifdef NET_LRO
4224         struct lro_ctrl         *lro = &rxr->lro;
4225         struct lro_entry        *queued;
4226 #endif
4227         int                     i, prog = 0;
4228         u32                     ptype, staterr = 0;
4229         union e1000_adv_rx_desc *cur;
4230
4231         IGB_RX_LOCK(rxr);
4232
4233         /* Main clean loop */
4234         for (i = rxr->next_to_check; count > 0; prog++) {
4235                 struct mbuf *sendmp, *mh, *mp;
4236                 u16 hlen, plen, hdr, vtag;
4237                 bool eop = FALSE;
4238                 u8 dopayload;
4239  
4240                 /* Sync the ring. */
4241                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4242                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4243                 cur = &rxr->rx_base[i];
4244                 staterr = le32toh(cur->wb.upper.status_error);
4245                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4246                         break;
4247                 if ((ifp->if_flags & IFF_RUNNING) == 0)
4248                         break;
4249                 count--;
4250                 sendmp = mh = mp = NULL;
4251                 cur->wb.upper.status_error = 0;
4252                 plen = le16toh(cur->wb.upper.length);
4253                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4254                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4255                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4256
4257                 /* Make sure all segments of a bad packet are discarded */
4258                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4259                     (rxr->discard)) {
4260                         ifp->if_ierrors++;
4261                         ++rxr->rx_discarded;
4262                         if (!eop) /* Catch subsequent segs */
4263                                 rxr->discard = TRUE;
4264                         else
4265                                 rxr->discard = FALSE;
4266                         igb_rx_discard(rxr, cur, i);
4267                         goto next_desc;
4268                 }
4269
4270                 /*
4271                 ** The way the hardware is configured to
4272                 ** split, it will ONLY use the header buffer
4273                 ** when header split is enabled, otherwise we
4274                 ** get normal behavior, ie, both header and
4275                 ** payload are DMA'd into the payload buffer.
4276                 **
4277                 ** The fmp test is to catch the case where a
4278                 ** packet spans multiple descriptors, in that
4279                 ** case only the first header is valid.
4280                 */
4281                 if (rxr->hdr_split && rxr->fmp == NULL) {
4282                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4283                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4284                         if (hlen > IGB_HDR_BUF)
4285                                 hlen = IGB_HDR_BUF;
4286                         /* Handle the header mbuf */
4287                         mh = rxr->rx_buffers[i].m_head;
4288                         mh->m_len = hlen;
4289                         dopayload = IGB_CLEAN_HEADER;
4290                         /*
4291                         ** Get the payload length, this
4292                         ** could be zero if its a small
4293                         ** packet.
4294                         */
4295                         if (plen > 0) {
4296                                 mp = rxr->rx_buffers[i].m_pack;
4297                                 mp->m_len = plen;
4298                                 mh->m_next = mp;
4299                                 dopayload = IGB_CLEAN_BOTH;
4300                                 rxr->rx_split_packets++;
4301                         }
4302                 } else {
4303                         /*
4304                         ** Either no header split, or a
4305                         ** secondary piece of a fragmented
4306                         ** split packet.
4307                         */
4308                         mh = rxr->rx_buffers[i].m_pack;
4309                         mh->m_len = plen;
4310                         dopayload = IGB_CLEAN_PAYLOAD;
4311                 }
4312
4313                 /*
4314                 ** get_buf will overwrite the writeback
4315                 ** descriptor so save the VLAN tag now.
4316                 */
4317                 vtag = le16toh(cur->wb.upper.vlan);
4318                 if (igb_get_buf(rxr, i, dopayload) != 0) {
4319                         ifp->if_iqdrops++;
4320                         /*
4321                          * We've dropped a frame due to lack of resources
4322                          * so we should drop entire multi-segmented
4323                          * frames until we encounter EOP.
4324                          */
4325                         if ((staterr & E1000_RXD_STAT_EOP) != 0)
4326                                 rxr->discard = TRUE;
4327                         igb_rx_discard(rxr, cur, i);
4328                         goto next_desc;
4329                 }
4330
4331                 /* Initial frame - setup */
4332                 if (rxr->fmp == NULL) {
4333                         mh->m_pkthdr.len = mh->m_len;
4334                         /* Store the first mbuf */
4335                         rxr->fmp = mh;
4336                         rxr->lmp = mh;
4337                         if (mp != NULL) {
4338                                 /* Add payload if split */
4339                                 mh->m_pkthdr.len += mp->m_len;
4340                                 rxr->lmp = mh->m_next;
4341                         }
4342                 } else {
4343                         /* Chain mbuf's together */
4344                         rxr->lmp->m_next = mh;
4345                         rxr->lmp = rxr->lmp->m_next;
4346                         rxr->fmp->m_pkthdr.len += mh->m_len;
4347                 }
4348
4349                 if (eop) {
4350                         rxr->fmp->m_pkthdr.rcvif = ifp;
4351                         ifp->if_ipackets++;
4352                         rxr->rx_packets++;
4353                         /* capture data for AIM */
4354                         rxr->packets++;
4355                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4356                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4357
4358                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4359                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4360                         /* XXX igb(4) always strips VLAN. */
4361                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4362                             (staterr & E1000_RXD_STAT_VP) != 0) {
4363                                 rxr->fmp->m_pkthdr.ether_vlantag = vtag;
4364                                 rxr->fmp->m_flags |= M_VLANTAG;
4365                         }
4366 #if __FreeBSD_version >= 800000
4367                         rxr->fmp->m_pkthdr.flowid = curcpu;
4368                         rxr->fmp->m_flags |= M_FLOWID;
4369 #endif
4370                         sendmp = rxr->fmp;
4371                         /* Make sure to set M_PKTHDR. */
4372                         sendmp->m_flags |= M_PKTHDR;
4373                         rxr->fmp = NULL;
4374                         rxr->lmp = NULL;
4375                 }
4376
4377 next_desc:
4378                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4379                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4380
4381                 rxr->last_cleaned = i; /* For updating tail */
4382
4383                 /* Advance our pointers to the next descriptor. */
4384                 if (++i == adapter->num_rx_desc)
4385                         i = 0;
4386  
4387                 /*
4388                 ** Note that we hold the RX lock thru
4389                 ** the following call so this ring's
4390                 ** next_to_check is not gonna change.
4391                 */
4392                 if (sendmp != NULL)
4393                         igb_rx_input(rxr, ifp, sendmp, ptype);
4394         }
4395
4396         if (prog == 0) {
4397                 IGB_RX_UNLOCK(rxr);
4398                 return (FALSE);
4399         }
4400
4401         rxr->next_to_check = i;
4402
4403         /* Advance the E1000's Receive Queue "Tail Pointer". */
4404         E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4405
4406         /*
4407          * Flush any outstanding LRO work
4408          */
4409 #ifdef NET_LRO
4410         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4411                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4412                 tcp_lro_flush(lro, queued);
4413         }
4414 #endif
4415
4416         IGB_RX_UNLOCK(rxr);
4417
4418         /*
4419         ** We still have cleaning to do?
4420         ** Schedule another interrupt if so.
4421         */
4422         if ((staterr & E1000_RXD_STAT_DD) != 0)
4423                 return (TRUE);
4424
4425         return (FALSE);
4426 }
4427
4428 /*********************************************************************
4429  *
4430  *  Verify that the hardware indicated that the checksum is valid.
4431  *  Inform the stack about the status of checksum so that stack
4432  *  doesn't spend time verifying the checksum.
4433  *
4434  *********************************************************************/
4435 static void
4436 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4437 {
4438         u16 status = (u16)staterr;
4439         u8  errors = (u8) (staterr >> 24);
4440         int sctp;
4441
4442         /* Ignore Checksum bit is set */
4443         if (status & E1000_RXD_STAT_IXSM) {
4444                 mp->m_pkthdr.csum_flags = 0;
4445                 return;
4446         }
4447
4448         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4449             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4450                 sctp = 1;
4451         else
4452                 sctp = 0;
4453         if (status & E1000_RXD_STAT_IPCS) {
4454                 /* Did it pass? */
4455                 if (!(errors & E1000_RXD_ERR_IPE)) {
4456                         /* IP Checksum Good */
4457                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4458                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4459                 } else
4460                         mp->m_pkthdr.csum_flags = 0;
4461         }
4462
4463         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4464                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4465 #if __FreeBSD_version >= 800000
4466                 if (sctp) /* reassign */
4467                         type = CSUM_SCTP_VALID;
4468 #endif
4469                 /* Did it pass? */
4470                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4471                         mp->m_pkthdr.csum_flags |= type;
4472                         if (sctp == 0)
4473                                 mp->m_pkthdr.csum_data = htons(0xffff);
4474                 }
4475         }
4476         return;
4477 }
4478
4479 /*
4480  * This routine is run via an vlan
4481  * config EVENT
4482  */
4483 static void
4484 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4485 {
4486         struct adapter  *adapter = ifp->if_softc;
4487         u32             index, bit;
4488
4489         if (ifp->if_softc !=  arg)   /* Not our event */
4490                 return;
4491
4492         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4493                 return;
4494
4495         index = (vtag >> 5) & 0x7F;
4496         bit = vtag & 0x1F;
4497         igb_shadow_vfta[index] |= (1 << bit);
4498         ++adapter->num_vlans;
4499         /* Re-init to load the changes */
4500         igb_init(adapter);
4501 }
4502
4503 /*
4504  * This routine is run via an vlan
4505  * unconfig EVENT
4506  */
4507 static void
4508 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4509 {
4510         struct adapter  *adapter = ifp->if_softc;
4511         u32             index, bit;
4512
4513         if (ifp->if_softc !=  arg)
4514                 return;
4515
4516         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4517                 return;
4518
4519         index = (vtag >> 5) & 0x7F;
4520         bit = vtag & 0x1F;
4521         igb_shadow_vfta[index] &= ~(1 << bit);
4522         --adapter->num_vlans;
4523         /* Re-init to load the changes */
4524         igb_init(adapter);
4525 }
4526
4527 static void
4528 igb_setup_vlan_hw_support(struct adapter *adapter)
4529 {
4530         struct e1000_hw *hw = &adapter->hw;
4531         u32             reg;
4532
4533         /*
4534         ** We get here thru init_locked, meaning
4535         ** a soft reset, this has already cleared
4536         ** the VFTA and other state, so if there
4537         ** have been no vlan's registered do nothing.
4538         */
4539         if (adapter->num_vlans == 0)
4540                 return;
4541
4542         /*
4543         ** A soft reset zero's out the VFTA, so
4544         ** we need to repopulate it now.
4545         */
4546         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4547                 if (igb_shadow_vfta[i] != 0)
4548                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4549                             i, igb_shadow_vfta[i]);
4550
4551         reg = E1000_READ_REG(hw, E1000_CTRL);
4552         reg |= E1000_CTRL_VME;
4553         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4554
4555         /* Enable the Filter Table */
4556         reg = E1000_READ_REG(hw, E1000_RCTL);
4557         reg &= ~E1000_RCTL_CFIEN;
4558         reg |= E1000_RCTL_VFE;
4559         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4560
4561         /* Update the frame size */
4562         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4563             adapter->max_frame_size + VLAN_TAG_SIZE);
4564 }
4565
4566 static void
4567 igb_enable_intr(struct adapter *adapter)
4568 {
4569         /* With RSS set up what to auto clear */
4570         if (adapter->msix_mem) {
4571                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4572                     adapter->eims_mask);
4573                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4574                     adapter->eims_mask);
4575                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4576                     adapter->eims_mask);
4577                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4578                     E1000_IMS_LSC);
4579         } else {
4580                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4581                     IMS_ENABLE_MASK);
4582         }
4583         E1000_WRITE_FLUSH(&adapter->hw);
4584
4585         return;
4586 }
4587
4588 static void
4589 igb_disable_intr(struct adapter *adapter)
4590 {
4591         if (adapter->msix_mem) {
4592                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4593                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4594         } 
4595         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4596         E1000_WRITE_FLUSH(&adapter->hw);
4597         return;
4598 }
4599
4600 /*
4601  * Bit of a misnomer, what this really means is
4602  * to enable OS management of the system... aka
4603  * to disable special hardware management features 
4604  */
4605 static void
4606 igb_init_manageability(struct adapter *adapter)
4607 {
4608         if (adapter->has_manage) {
4609                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4610                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4611
4612                 /* disable hardware interception of ARP */
4613                 manc &= ~(E1000_MANC_ARP_EN);
4614
4615                 /* enable receiving management packets to the host */
4616                 manc |= E1000_MANC_EN_MNG2HOST;
4617                 manc2h |= 1 << 5;  /* Mng Port 623 */
4618                 manc2h |= 1 << 6;  /* Mng Port 664 */
4619                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4620                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4621         }
4622 }
4623
4624 /*
4625  * Give control back to hardware management
4626  * controller if there is one.
4627  */
4628 static void
4629 igb_release_manageability(struct adapter *adapter)
4630 {
4631         if (adapter->has_manage) {
4632                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4633
4634                 /* re-enable hardware interception of ARP */
4635                 manc |= E1000_MANC_ARP_EN;
4636                 manc &= ~E1000_MANC_EN_MNG2HOST;
4637
4638                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4639         }
4640 }
4641
4642 /*
4643  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4644  * For ASF and Pass Through versions of f/w this means that
4645  * the driver is loaded. 
4646  *
4647  */
4648 static void
4649 igb_get_hw_control(struct adapter *adapter)
4650 {
4651         u32 ctrl_ext;
4652
4653         /* Let firmware know the driver has taken over */
4654         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4655         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4656             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4657 }
4658
4659 /*
4660  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4661  * For ASF and Pass Through versions of f/w this means that the
4662  * driver is no longer loaded.
4663  *
4664  */
4665 static void
4666 igb_release_hw_control(struct adapter *adapter)
4667 {
4668         u32 ctrl_ext;
4669
4670         /* Let firmware taken over control of h/w */
4671         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4672         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4673             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4674 }
4675
4676 static int
4677 igb_is_valid_ether_addr(uint8_t *addr)
4678 {
4679         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4680
4681         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4682                 return (FALSE);
4683         }
4684
4685         return (TRUE);
4686 }
4687
4688
4689 /*
4690  * Enable PCI Wake On Lan capability
4691  */
4692 void
4693 igb_enable_wakeup(device_t dev)
4694 {
4695         u16     cap, status;
4696         u8      id;
4697
4698         /* First find the capabilities pointer*/
4699         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4700         /* Read the PM Capabilities */
4701         id = pci_read_config(dev, cap, 1);
4702         if (id != PCIY_PMG)     /* Something wrong */
4703                 return;
4704         /* OK, we have the power capabilities, so
4705            now get the status register */
4706         cap += PCIR_POWER_STATUS;
4707         status = pci_read_config(dev, cap, 2);
4708         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4709         pci_write_config(dev, cap, status, 2);
4710         return;
4711 }
4712
4713
4714 /**********************************************************************
4715  *
4716  *  Update the board statistics counters.
4717  *
4718  **********************************************************************/
4719 static void
4720 igb_update_stats_counters(struct adapter *adapter)
4721 {
4722         struct ifnet   *ifp;
4723
4724         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4725            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4726                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4727                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4728         }
4729         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4730         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4731         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4732         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4733
4734         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4735         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4736         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4737         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4738         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4739         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4740         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4741         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4742         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4743         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4744         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4745         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4746         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4747         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4748         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4749         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4750         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4751         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4752         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4753         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4754
4755         /* For the 64-bit byte counters the low dword must be read first. */
4756         /* Both registers clear on the read of the high dword */
4757
4758         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4759         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4760
4761         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4762         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4763         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4764         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4765         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4766
4767         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4768         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4769
4770         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4771         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4772         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4773         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4774         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4775         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4776         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4777         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4778         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4779         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4780
4781         adapter->stats.algnerrc += 
4782                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4783         adapter->stats.rxerrc += 
4784                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4785         adapter->stats.tncrs += 
4786                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4787         adapter->stats.cexterr += 
4788                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4789         adapter->stats.tsctc += 
4790                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4791         adapter->stats.tsctfc += 
4792                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4793         ifp = adapter->ifp;
4794
4795         ifp->if_collisions = adapter->stats.colc;
4796
4797         /* Rx Errors */
4798         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4799             adapter->stats.crcerrs + adapter->stats.algnerrc +
4800             adapter->stats.ruc + adapter->stats.roc +
4801             adapter->stats.mpc + adapter->stats.cexterr;
4802
4803         /* Tx Errors */
4804         ifp->if_oerrors = adapter->stats.ecol +
4805             adapter->stats.latecol + adapter->watchdog_events;
4806 }
4807
4808
4809 /**********************************************************************
4810  *
4811  *  This routine is called only when igb_display_debug_stats is enabled.
4812  *  This routine provides a way to take a look at important statistics
4813  *  maintained by the driver and hardware.
4814  *
4815  **********************************************************************/
4816 static void
4817 igb_print_debug_info(struct adapter *adapter)
4818 {
4819         device_t dev = adapter->dev;
4820         struct igb_queue *que = adapter->queues;
4821         struct rx_ring *rxr = adapter->rx_rings;
4822         struct tx_ring *txr = adapter->tx_rings;
4823         uint8_t *hw_addr = adapter->hw.hw_addr;
4824
4825         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4826         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4827             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4828             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4829
4830 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4831         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4832             E1000_READ_REG(&adapter->hw, E1000_IMS),
4833             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4834 #endif
4835
4836         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4837             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4838             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4839         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4840             adapter->hw.fc.high_water,
4841             adapter->hw.fc.low_water);
4842
4843         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4844                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4845                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4846                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4847                 device_printf(dev, "rdh = %d, rdt = %d\n",
4848                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4849                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4850                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4851                     txr->me, (long long)txr->no_desc_avail);
4852                 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4853                     txr->me, (long long)txr->tx_packets);
4854                 device_printf(dev, "RX(%d) Packets received = %lld  ",
4855                     rxr->me, (long long)rxr->rx_packets);
4856         }
4857
4858         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4859 #ifdef NET_LRO
4860                 struct lro_ctrl *lro = &rxr->lro;
4861 #endif
4862                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4863                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4864                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4865                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4866                     (long long)rxr->rx_packets);
4867                 device_printf(dev, " Split Packets = %lld ",
4868                     (long long)rxr->rx_split_packets);
4869                 device_printf(dev, " Byte count = %lld\n",
4870                     (long long)rxr->rx_bytes);
4871 #ifdef NET_LRO
4872                 device_printf(dev,"RX(%d) LRO Queued= %d  ",
4873                     i, lro->lro_queued);
4874                 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4875 #endif
4876         }
4877
4878         for (int i = 0; i < adapter->num_queues; i++, que++)
4879                 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4880                     i, (long long)que->irqs);
4881
4882         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4883         device_printf(dev, "Mbuf defrag failed = %ld\n",
4884             adapter->mbuf_defrag_failed);
4885         device_printf(dev, "Std mbuf header failed = %ld\n",
4886             adapter->mbuf_header_failed);
4887         device_printf(dev, "Std mbuf packet failed = %ld\n",
4888             adapter->mbuf_packet_failed);
4889         device_printf(dev, "Driver dropped packets = %ld\n",
4890             adapter->dropped_pkts);
4891         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4892                 adapter->no_tx_dma_setup);
4893 }
4894
4895 static void
4896 igb_print_hw_stats(struct adapter *adapter)
4897 {
4898         device_t dev = adapter->dev;
4899
4900         device_printf(dev, "Excessive collisions = %lld\n",
4901             (long long)adapter->stats.ecol);
4902 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4903         device_printf(dev, "Symbol errors = %lld\n",
4904             (long long)adapter->stats.symerrs);
4905 #endif
4906         device_printf(dev, "Sequence errors = %lld\n",
4907             (long long)adapter->stats.sec);
4908         device_printf(dev, "Defer count = %lld\n",
4909             (long long)adapter->stats.dc);
4910         device_printf(dev, "Missed Packets = %lld\n",
4911             (long long)adapter->stats.mpc);
4912         device_printf(dev, "Receive No Buffers = %lld\n",
4913             (long long)adapter->stats.rnbc);
4914         /* RLEC is inaccurate on some hardware, calculate our own. */
4915         device_printf(dev, "Receive Length Errors = %lld\n",
4916             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4917         device_printf(dev, "Receive errors = %lld\n",
4918             (long long)adapter->stats.rxerrc);
4919         device_printf(dev, "Crc errors = %lld\n",
4920             (long long)adapter->stats.crcerrs);
4921         device_printf(dev, "Alignment errors = %lld\n",
4922             (long long)adapter->stats.algnerrc);
4923         /* On 82575 these are collision counts */
4924         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4925             (long long)adapter->stats.cexterr);
4926         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4927         device_printf(dev, "watchdog timeouts = %ld\n",
4928             adapter->watchdog_events);
4929         device_printf(dev, "XON Rcvd = %lld\n",
4930             (long long)adapter->stats.xonrxc);
4931         device_printf(dev, "XON Xmtd = %lld\n",
4932             (long long)adapter->stats.xontxc);
4933         device_printf(dev, "XOFF Rcvd = %lld\n",
4934             (long long)adapter->stats.xoffrxc);
4935         device_printf(dev, "XOFF Xmtd = %lld\n",
4936             (long long)adapter->stats.xofftxc);
4937         device_printf(dev, "Good Packets Rcvd = %lld\n",
4938             (long long)adapter->stats.gprc);
4939         device_printf(dev, "Good Packets Xmtd = %lld\n",
4940             (long long)adapter->stats.gptc);
4941         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4942             (long long)adapter->stats.tsctc);
4943         device_printf(dev, "TSO Contexts Failed = %lld\n",
4944             (long long)adapter->stats.tsctfc);
4945 }
4946
4947 /**********************************************************************
4948  *
4949  *  This routine provides a way to dump out the adapter eeprom,
4950  *  often a useful debug/service tool. This only dumps the first
4951  *  32 words, stuff that matters is in that extent.
4952  *
4953  **********************************************************************/
4954 static void
4955 igb_print_nvm_info(struct adapter *adapter)
4956 {
4957         u16     eeprom_data;
4958         int     i, j, row = 0;
4959
4960         /* Its a bit crude, but it gets the job done */
4961         kprintf("\nInterface EEPROM Dump:\n");
4962         kprintf("Offset\n0x0000  ");
4963         for (i = 0, j = 0; i < 32; i++, j++) {
4964                 if (j == 8) { /* Make the offset block */
4965                         j = 0; ++row;
4966                         kprintf("\n0x00%x0  ",row);
4967                 }
4968                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4969                 kprintf("%04x ", eeprom_data);
4970         }
4971         kprintf("\n");
4972 }
4973
4974 static int
4975 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4976 {
4977         struct adapter *adapter;
4978         int error;
4979         int result;
4980
4981         result = -1;
4982         error = sysctl_handle_int(oidp, &result, 0, req);
4983
4984         if (error || !req->newptr)
4985                 return (error);
4986
4987         if (result == 1) {
4988                 adapter = (struct adapter *)arg1;
4989                 igb_print_debug_info(adapter);
4990         }
4991         /*
4992          * This value will cause a hex dump of the
4993          * first 32 16-bit words of the EEPROM to
4994          * the screen.
4995          */
4996         if (result == 2) {
4997                 adapter = (struct adapter *)arg1;
4998                 igb_print_nvm_info(adapter);
4999         }
5000
5001         return (error);
5002 }
5003
5004
5005 static int
5006 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
5007 {
5008         struct adapter *adapter;
5009         int error;
5010         int result;
5011
5012         result = -1;
5013         error = sysctl_handle_int(oidp, &result, 0, req);
5014
5015         if (error || !req->newptr)
5016                 return (error);
5017
5018         if (result == 1) {
5019                 adapter = (struct adapter *)arg1;
5020                 igb_print_hw_stats(adapter);
5021         }
5022
5023         return (error);
5024 }
5025
5026 static void
5027 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5028         const char *description, int *limit, int value)
5029 {
5030         *limit = value;
5031         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5032             SYSCTL_CHILDREN(adapter->sysctl_tree),
5033             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5034 }