igb: Always prepare for vlan when setup reception unit
[dragonfly.git] / sys / dev / netif / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33
34
35 #include "opt_polling.h"
36 #include "opt_inet.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #if __FreeBSD_version >= 800000
41 #include <sys/buf_ring.h>
42 #endif
43 #include <sys/bus.h>
44 #include <sys/endian.h>
45 #include <sys/lock.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/module.h>
51 #include <sys/rman.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/taskqueue.h>
56 #include <sys/eventhandler.h>
57
58 #ifdef IGB_IEEE1588
59 #include <sys/ieee1588.h>
60 #endif
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/ifq_var.h>
69
70 #include <net/if_types.h>
71 #include <net/vlan/if_vlan_var.h>
72 #include <net/vlan/if_vlan_ether.h>
73
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #ifdef NET_LRO
81 #include <netinet/tcp_lro.h>
82 #endif
83 #include <netinet/udp.h>
84
85 #include <sys/in_cksum.h>
86 #include <bus/pci/pcivar.h>
87 #include <bus/pci/pcireg.h>
88
89 #include "e1000_api.h"
90 #include "e1000_82575.h"
91 #include "if_igb.h"
92 #include "ifcap_defines.h" // XXX
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.1";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         /* required last entry */
140         { 0, 0, 0, 0, 0}
141 };
142
143 /*********************************************************************
144  *  Table of branding strings for all supported NICs.
145  *********************************************************************/
146
147 static char *igb_strings[] = {
148         "Intel(R) PRO/1000 Network Connection"
149 };
150
151 /*********************************************************************
152  *  Function prototypes
153  *********************************************************************/
154 static int      igb_probe(device_t);
155 static int      igb_attach(device_t);
156 static int      igb_detach(device_t);
157 static int      igb_shutdown(device_t);
158 static int      igb_suspend(device_t);
159 static int      igb_resume(device_t);
160 static void     igb_start(struct ifnet *);
161 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
162 #if __FreeBSD_version >= 800000
163 static int      igb_mq_start(struct ifnet *, struct mbuf *);
164 static int      igb_mq_start_locked(struct ifnet *,
165                     struct tx_ring *, struct mbuf *);
166 static void     igb_qflush(struct ifnet *);
167 #endif
168 static int      igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
169 static void     igb_init(void *);
170 static void     igb_init_locked(struct adapter *);
171 static void     igb_stop(void *);
172 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
173 static int      igb_media_change(struct ifnet *);
174 static void     igb_identify_hardware(struct adapter *);
175 static int      igb_allocate_pci_resources(struct adapter *);
176 static int      igb_allocate_msix(struct adapter *);
177 static int      igb_allocate_legacy(struct adapter *);
178 static int      igb_setup_msix(struct adapter *);
179 static void     igb_free_pci_resources(struct adapter *);
180 static void     igb_local_timer(void *);
181 static void     igb_reset(struct adapter *);
182 static void     igb_setup_interface(device_t, struct adapter *);
183 static int      igb_allocate_queues(struct adapter *);
184 static void     igb_configure_queues(struct adapter *);
185
186 static int      igb_allocate_transmit_buffers(struct tx_ring *);
187 static void     igb_setup_transmit_structures(struct adapter *);
188 static void     igb_setup_transmit_ring(struct tx_ring *);
189 static void     igb_initialize_transmit_units(struct adapter *);
190 static void     igb_free_transmit_structures(struct adapter *);
191 static void     igb_free_transmit_buffers(struct tx_ring *);
192
193 static int      igb_allocate_receive_buffers(struct rx_ring *);
194 static int      igb_setup_receive_structures(struct adapter *);
195 static int      igb_setup_receive_ring(struct rx_ring *);
196 static void     igb_initialize_receive_units(struct adapter *);
197 static void     igb_free_receive_structures(struct adapter *);
198 static void     igb_free_receive_buffers(struct rx_ring *);
199 static void     igb_free_receive_ring(struct rx_ring *);
200
201 static void     igb_enable_intr(struct adapter *);
202 static void     igb_disable_intr(struct adapter *);
203 static void     igb_update_stats_counters(struct adapter *);
204 static bool     igb_txeof(struct tx_ring *);
205
206 static __inline void igb_rx_discard(struct rx_ring *,
207                     union e1000_adv_rx_desc *, int);
208 static __inline void igb_rx_input(struct rx_ring *,
209                     struct ifnet *, struct mbuf *, u32);
210
211 static bool     igb_rxeof(struct rx_ring *, int);
212 static void     igb_rx_checksum(u32, struct mbuf *, u32);
213 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
214 #if NET_TSO 
215 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
216 #endif
217 static void     igb_set_promisc(struct adapter *);
218 static void     igb_disable_promisc(struct adapter *);
219 static void     igb_set_multi(struct adapter *);
220 static void     igb_print_hw_stats(struct adapter *);
221 static void     igb_update_link_status(struct adapter *);
222 static int      igb_get_buf(struct rx_ring *, int, u8);
223
224 static void     igb_register_vlan(void *, struct ifnet *, u16);
225 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
226 static void     igb_setup_vlan_hw_support(struct adapter *);
227
228 static int      igb_xmit(struct tx_ring *, struct mbuf **);
229 static int      igb_dma_malloc(struct adapter *, bus_size_t,
230                     struct igb_dma_alloc *, int);
231 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
232 static void     igb_print_debug_info(struct adapter *);
233 static void     igb_print_nvm_info(struct adapter *);
234 static int      igb_is_valid_ether_addr(u8 *);
235 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
236 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
237 /* Management and WOL Support */
238 static void     igb_init_manageability(struct adapter *);
239 static void     igb_release_manageability(struct adapter *);
240 static void     igb_get_hw_control(struct adapter *);
241 static void     igb_release_hw_control(struct adapter *);
242 static void     igb_enable_wakeup(device_t);
243
244 static void     igb_irq_fast(void *);
245 static void     igb_add_rx_process_limit(struct adapter *, const char *,
246                     const char *, int *, int);
247 static void     igb_handle_rxtx(void *context, int pending);
248 static void     igb_handle_que(void *context, int pending);
249 static void     igb_handle_link(void *context, int pending);
250
251 /* These are MSIX only irq handlers */
252 static void     igb_msix_que(void *);
253 static void     igb_msix_link(void *);
254
255 #ifdef DEVICE_POLLING
256 static poll_handler_t igb_poll;
257 #endif /* POLLING */
258
259 /*********************************************************************
260  *  FreeBSD Device Interface Entry Points
261  *********************************************************************/
262
263 static device_method_t igb_methods[] = {
264         /* Device interface */
265         DEVMETHOD(device_probe, igb_probe),
266         DEVMETHOD(device_attach, igb_attach),
267         DEVMETHOD(device_detach, igb_detach),
268         DEVMETHOD(device_shutdown, igb_shutdown),
269         DEVMETHOD(device_suspend, igb_suspend),
270         DEVMETHOD(device_resume, igb_resume),
271         {0, 0}
272 };
273
274 static driver_t igb_driver = {
275         "igb", igb_methods, sizeof(struct adapter),
276 };
277
278 static devclass_t igb_devclass;
279 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, NULL, NULL);
280 MODULE_DEPEND(igb, pci, 1, 1, 1);
281 MODULE_DEPEND(igb, ether, 1, 1, 1);
282
283 /*********************************************************************
284  *  Tunable default values.
285  *********************************************************************/
286
287 /* Descriptor defaults */
288 static int igb_rxd = IGB_DEFAULT_RXD;
289 static int igb_txd = IGB_DEFAULT_TXD;
290 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
291 TUNABLE_INT("hw.igb.txd", &igb_txd);
292
293 /*
294 ** AIM: Adaptive Interrupt Moderation
295 ** which means that the interrupt rate
296 ** is varied over time based on the
297 ** traffic for that interrupt vector
298 */
299 static int igb_enable_aim = TRUE;
300 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
301
302 /*
303  * MSIX should be the default for best performance,
304  * but this allows it to be forced off for testing.
305  */         
306 static int igb_enable_msix = 0;
307 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
308
309 /*
310  * Header split has seemed to be beneficial in
311  * many circumstances tested, however there have
312  * been some stability issues, so the default is
313  * off. 
314  */
315 static bool igb_header_split = FALSE;
316 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
317
318 /*
319 ** This will autoconfigure based on
320 ** the number of CPUs if left at 0.
321 */
322 static int igb_num_queues = 0;
323 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
324
325 /* How many packets rxeof tries to clean at a time */
326 static int igb_rx_process_limit = 100;
327 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
328
329 /* Flow control setting - default to FULL */
330 static int igb_fc_setting = e1000_fc_full;
331 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
332
333 /*
334 ** Shadow VFTA table, this is needed because
335 ** the real filter table gets cleared during
336 ** a soft reset and the driver needs to be able
337 ** to repopulate it.
338 */
339 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
340
341
342 /*********************************************************************
343  *  Device identification routine
344  *
345  *  igb_probe determines if the driver should be loaded on
346  *  adapter based on PCI vendor/device id of the adapter.
347  *
348  *  return BUS_PROBE_DEFAULT on success, positive on failure
349  *********************************************************************/
350
351 static int
352 igb_probe(device_t dev)
353 {
354         char            adapter_name[60];
355         uint16_t        pci_vendor_id = 0;
356         uint16_t        pci_device_id = 0;
357         uint16_t        pci_subvendor_id = 0;
358         uint16_t        pci_subdevice_id = 0;
359         igb_vendor_info_t *ent;
360
361         INIT_DEBUGOUT("igb_probe: begin");
362
363         pci_vendor_id = pci_get_vendor(dev);
364         if (pci_vendor_id != IGB_VENDOR_ID)
365                 return (ENXIO);
366
367         pci_device_id = pci_get_device(dev);
368         pci_subvendor_id = pci_get_subvendor(dev);
369         pci_subdevice_id = pci_get_subdevice(dev);
370
371         ent = igb_vendor_info_array;
372         while (ent->vendor_id != 0) {
373                 if ((pci_vendor_id == ent->vendor_id) &&
374                     (pci_device_id == ent->device_id) &&
375
376                     ((pci_subvendor_id == ent->subvendor_id) ||
377                     (ent->subvendor_id == PCI_ANY_ID)) &&
378
379                     ((pci_subdevice_id == ent->subdevice_id) ||
380                     (ent->subdevice_id == PCI_ANY_ID))) {
381                         ksprintf(adapter_name, "%s %s",
382                                 igb_strings[ent->index],
383                                 igb_driver_version);
384                         device_set_desc_copy(dev, adapter_name);
385                         return (BUS_PROBE_DEFAULT);
386                 }
387                 ent++;
388         }
389
390         return (ENXIO);
391 }
392
393 /*********************************************************************
394  *  Device initialization routine
395  *
396  *  The attach entry point is called when the driver is being loaded.
397  *  This routine identifies the type of hardware, allocates all resources
398  *  and initializes the hardware.
399  *
400  *  return 0 on success, positive on failure
401  *********************************************************************/
402
403 static int
404 igb_attach(device_t dev)
405 {
406         struct adapter  *adapter;
407         int             error = 0;
408         u16             eeprom_data;
409
410         INIT_DEBUGOUT("igb_attach: begin");
411
412         adapter = device_get_softc(dev);
413         adapter->dev = adapter->osdep.dev = dev;
414         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
415
416         /* SYSCTL stuff */
417         sysctl_ctx_init(&adapter->sysctl_ctx);
418         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
419                                         SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
420                                         device_get_nameunit(adapter->dev),
421                                         CTLFLAG_RD, 0, "");
422         if (adapter->sysctl_tree == NULL) {
423                 device_printf(adapter->dev, "can't add sysctl node\n");
424                 error = ENOMEM;
425                 goto err_sysctl;
426         }
427
428         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
429             SYSCTL_CHILDREN(adapter->sysctl_tree),
430             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
431             igb_sysctl_debug_info, "I", "Debug Information");
432
433         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
434             SYSCTL_CHILDREN(adapter->sysctl_tree),
435             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
436             igb_sysctl_stats, "I", "Statistics");
437
438         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
439             SYSCTL_CHILDREN(adapter->sysctl_tree),
440             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
441             &igb_fc_setting, 0, "Flow Control");
442
443         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
444             SYSCTL_CHILDREN(adapter->sysctl_tree),
445             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
446             &igb_enable_aim, 1, "Interrupt Moderation");
447
448         callout_init_mp(&adapter->timer);
449
450         /* Determine hardware and mac info */
451         igb_identify_hardware(adapter);
452
453         /* Setup PCI resources */
454         if (igb_allocate_pci_resources(adapter)) {
455                 device_printf(dev, "Allocation of PCI resources failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         /* Do Shared Code initialization */
461         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
462                 device_printf(dev, "Setup of Shared code failed\n");
463                 error = ENXIO;
464                 goto err_pci;
465         }
466
467         e1000_get_bus_info(&adapter->hw);
468
469         /* Sysctls for limiting the amount of work done in the taskqueue */
470         igb_add_rx_process_limit(adapter, "rx_processing_limit",
471             "max number of rx packets to process", &adapter->rx_process_limit,
472             igb_rx_process_limit);
473
474         /*
475          * Validate number of transmit and receive descriptors. It
476          * must not exceed hardware maximum, and must be multiple
477          * of E1000_DBA_ALIGN.
478          */
479         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
480             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
481                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
482                     IGB_DEFAULT_TXD, igb_txd);
483                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
484         } else
485                 adapter->num_tx_desc = igb_txd;
486         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
487             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
488                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
489                     IGB_DEFAULT_RXD, igb_rxd);
490                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
491         } else
492                 adapter->num_rx_desc = igb_rxd;
493
494         adapter->hw.mac.autoneg = DO_AUTO_NEG;
495         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
496         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
497
498         /* Copper options */
499         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
500                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
501                 adapter->hw.phy.disable_polarity_correction = FALSE;
502                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
503         }
504
505         /*
506          * Set the frame limits assuming
507          * standard ethernet sized frames.
508          */
509         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
510         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
511
512         /*
513         ** Allocate and Setup Queues
514         */
515         if (igb_allocate_queues(adapter)) {
516                 error = ENOMEM;
517                 goto err_pci;
518         }
519
520         /*
521         ** Start from a known state, this is
522         ** important in reading the nvm and
523         ** mac from that.
524         */
525         e1000_reset_hw(&adapter->hw);
526
527         /* Make sure we have a good EEPROM before we read from it */
528         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
529                 /*
530                 ** Some PCI-E parts fail the first check due to
531                 ** the link being in sleep state, call it again,
532                 ** if it fails a second time its a real issue.
533                 */
534                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
535                         device_printf(dev,
536                             "The EEPROM Checksum Is Not Valid\n");
537                         error = EIO;
538                         goto err_late;
539                 }
540         }
541
542         /*
543         ** Copy the permanent MAC address out of the EEPROM
544         */
545         if (e1000_read_mac_addr(&adapter->hw) < 0) {
546                 device_printf(dev, "EEPROM read error while reading MAC"
547                     " address\n");
548                 error = EIO;
549                 goto err_late;
550         }
551         /* Check its sanity */
552         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
553                 device_printf(dev, "Invalid MAC address\n");
554                 error = EIO;
555                 goto err_late;
556         }
557
558         /* 
559         ** Configure Interrupts
560         */
561         if ((adapter->msix > 1) && (igb_enable_msix))
562                 error = igb_allocate_msix(adapter);
563         else /* MSI or Legacy */
564                 error = igb_allocate_legacy(adapter);
565         if (error)
566                 goto err_late;
567
568         /* Setup OS specific network interface */
569         igb_setup_interface(dev, adapter);
570
571         /* Now get a good starting state */
572         igb_reset(adapter);
573
574         /* Initialize statistics */
575         igb_update_stats_counters(adapter);
576
577         adapter->hw.mac.get_link_status = 1;
578         igb_update_link_status(adapter);
579
580         /* Indicate SOL/IDER usage */
581         if (e1000_check_reset_block(&adapter->hw))
582                 device_printf(dev,
583                     "PHY reset is blocked due to SOL/IDER session.\n");
584
585         /* Determine if we have to control management hardware */
586         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
587
588         /*
589          * Setup Wake-on-Lan
590          */
591         /* APME bit in EEPROM is mapped to WUC.APME */
592         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
593         if (eeprom_data)
594                 adapter->wol = E1000_WUFC_MAG;
595
596         /* Register for VLAN events */
597         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
598              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
599         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
600              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
601
602         /* Tell the stack that the interface is not active */
603         adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
604
605         INIT_DEBUGOUT("igb_attach: end");
606
607         return (0);
608
609 err_late:
610         igb_free_transmit_structures(adapter);
611         igb_free_receive_structures(adapter);
612         igb_release_hw_control(adapter);
613 err_pci:
614         igb_free_pci_resources(adapter);
615 err_sysctl:
616         sysctl_ctx_free(&adapter->sysctl_ctx);
617         IGB_CORE_LOCK_DESTROY(adapter);
618
619         return (error);
620 }
621
622 /*********************************************************************
623  *  Device removal routine
624  *
625  *  The detach entry point is called when the driver is being removed.
626  *  This routine stops the adapter and deallocates all the resources
627  *  that were allocated for driver operation.
628  *
629  *  return 0 on success, positive on failure
630  *********************************************************************/
631
632 static int
633 igb_detach(device_t dev)
634 {
635         struct adapter  *adapter = device_get_softc(dev);
636
637         INIT_DEBUGOUT("igb_detach: begin");
638
639         IGB_CORE_LOCK(adapter);
640         adapter->in_detach = 1;
641         igb_stop(adapter);
642         IGB_CORE_UNLOCK(adapter);
643
644         e1000_phy_hw_reset(&adapter->hw);
645
646         /* Give control back to firmware */
647         igb_release_manageability(adapter);
648         igb_release_hw_control(adapter);
649
650         if (adapter->wol) {
651                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
652                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
653                 igb_enable_wakeup(dev);
654         }
655
656         /* Unregister VLAN events */
657         if (adapter->vlan_attach != NULL)
658                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
659         if (adapter->vlan_detach != NULL)
660                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
661
662         ether_ifdetach(adapter->ifp);
663
664         //callout_drain(&adapter->timer);
665         callout_stop(&adapter->timer);
666
667         igb_free_pci_resources(adapter);
668         bus_generic_detach(dev);
669
670         igb_free_transmit_structures(adapter);
671         igb_free_receive_structures(adapter);
672
673         sysctl_ctx_free(&adapter->sysctl_ctx);
674         IGB_CORE_LOCK_DESTROY(adapter);
675
676         return (0);
677 }
678
679 /*********************************************************************
680  *
681  *  Shutdown entry point
682  *
683  **********************************************************************/
684
685 static int
686 igb_shutdown(device_t dev)
687 {
688         return igb_suspend(dev);
689 }
690
691 /*
692  * Suspend/resume device methods.
693  */
694 static int
695 igb_suspend(device_t dev)
696 {
697         struct adapter *adapter = device_get_softc(dev);
698
699         IGB_CORE_LOCK(adapter);
700
701         igb_stop(adapter);
702
703         igb_release_manageability(adapter);
704         igb_release_hw_control(adapter);
705
706         if (adapter->wol) {
707                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
708                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
709                 igb_enable_wakeup(dev);
710         }
711
712         IGB_CORE_UNLOCK(adapter);
713
714         return bus_generic_suspend(dev);
715 }
716
717 static int
718 igb_resume(device_t dev)
719 {
720         struct adapter *adapter = device_get_softc(dev);
721         struct ifnet *ifp = adapter->ifp;
722
723         IGB_CORE_LOCK(adapter);
724         igb_init_locked(adapter);
725         igb_init_manageability(adapter);
726
727         if ((ifp->if_flags & IFF_UP) &&
728             (ifp->if_flags & IFF_RUNNING))
729                 igb_start(ifp);
730
731         IGB_CORE_UNLOCK(adapter);
732
733         return bus_generic_resume(dev);
734 }
735
736
737 /*********************************************************************
738  *  Transmit entry point
739  *
740  *  igb_start is called by the stack to initiate a transmit.
741  *  The driver will remain in this routine as long as there are
742  *  packets to transmit and transmit resources are available.
743  *  In case resources are not available stack is notified and
744  *  the packet is requeued.
745  **********************************************************************/
746
747 static void
748 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
749 {
750         struct adapter  *adapter = ifp->if_softc;
751         struct mbuf     *m_head;
752
753         IGB_TX_LOCK_ASSERT(txr);
754
755         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
756                 return;
757
758         /*
759          * Must purge on abort from this point on or the netif will call
760          * us endlessly.  Either that or set IFF_OACTIVE.
761          */
762         if (!adapter->link_active) {
763                 ifq_purge(&ifp->if_snd);
764                 return;
765         }
766
767         while (!ifq_is_empty(&ifp->if_snd)) {
768
769                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
770                 if (m_head == NULL)
771                         break;
772                 /*
773                  *  Encapsulation can modify our pointer, and or make it
774                  *  NULL on failure.  In that event, we can't requeue.
775                  */
776                 if (igb_xmit(txr, &m_head)) {
777                         if (m_head == NULL)
778                                 break;
779                         ifp->if_flags |= IFF_OACTIVE;
780                         ifq_prepend(&ifp->if_snd, m_head);
781                         break;
782                 }
783
784                 /* Send a copy of the frame to the BPF listener */
785                 ETHER_BPF_MTAP(ifp, m_head);
786
787                 /* Set watchdog on */
788                 txr->watchdog_check = TRUE;
789         }
790 }
791  
792 /*
793  * Legacy TX driver routine, called from the
794  * stack, always uses tx[0], and spins for it.
795  * Should not be used with multiqueue tx
796  */
797 static void
798 igb_start(struct ifnet *ifp)
799 {
800         struct adapter  *adapter = ifp->if_softc;
801         struct tx_ring  *txr = adapter->tx_rings;
802
803         if (ifp->if_flags & IFF_RUNNING) {
804                 IGB_TX_LOCK(txr);
805                 igb_start_locked(txr, ifp);
806                 IGB_TX_UNLOCK(txr);
807         }
808         return;
809 }
810
811 #if __FreeBSD_version >= 800000
812 /*
813 ** Multiqueue Transmit driver
814 **
815 */
816 static int
817 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
818 {
819         struct adapter  *adapter = ifp->if_softc;
820         struct tx_ring  *txr;
821         int             i = 0, err = 0;
822
823         /* Which queue to use */
824         if ((m->m_flags & M_FLOWID) != 0)
825                 i = m->m_pkthdr.flowid % adapter->num_queues;
826         txr = &adapter->tx_rings[i];
827
828         if (IGB_TX_TRYLOCK(txr)) {
829                 err = igb_mq_start_locked(ifp, txr, m);
830                 IGB_TX_UNLOCK(txr);
831         } else
832                 err = drbr_enqueue(ifp, txr->br, m);
833
834         return (err);
835 }
836
837 static int
838 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
839 {
840         struct adapter  *adapter = txr->adapter;
841         struct mbuf     *next;
842         int             err = 0, enq;
843
844         IGB_TX_LOCK_ASSERT(txr);
845
846         if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) !=
847             IFF_RUNNING || adapter->link_active == 0) {
848                 if (m != NULL)
849                         err = drbr_enqueue(ifp, txr->br, m);
850                 return (err);
851         }
852
853         enq = 0;
854         if (m == NULL) {
855                 next = drbr_dequeue(ifp, txr->br);
856         } else if (drbr_needs_enqueue(ifp, txr->br)) {
857                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
858                         return (err);
859                 next = drbr_dequeue(ifp, txr->br);
860         } else
861                 next = m;
862         /* Process the queue */
863         while (next != NULL) {
864                 if ((err = igb_xmit(txr, &next)) != 0) {
865                         if (next != NULL)
866                                 err = drbr_enqueue(ifp, txr->br, next);
867                         break;
868                 }
869                 enq++;
870                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
871                 ETHER_BPF_MTAP(ifp, next);
872                 if ((ifp->if_flags & IFF_RUNNING) == 0)
873                         break;
874                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
875                         ifp->if_flags |= IFF_OACTIVE;
876                         break;
877                 }
878                 next = drbr_dequeue(ifp, txr->br);
879         }
880         if (enq > 0) {
881                 /* Set the watchdog */
882                 txr->watchdog_check = TRUE;
883         }
884         return (err);
885 }
886
887 /*
888 ** Flush all ring buffers
889 */
890 static void
891 igb_qflush(struct ifnet *ifp)
892 {
893         struct adapter  *adapter = ifp->if_softc;
894         struct tx_ring  *txr = adapter->tx_rings;
895         struct mbuf     *m;
896
897         for (int i = 0; i < adapter->num_queues; i++, txr++) {
898                 IGB_TX_LOCK(txr);
899                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
900                         m_freem(m);
901                 IGB_TX_UNLOCK(txr);
902         }
903         if_qflush(ifp);
904 }
905 #endif /* __FreeBSD_version >= 800000 */
906
907 /*********************************************************************
908  *  Ioctl entry point
909  *
910  *  igb_ioctl is called when the user wants to configure the
911  *  interface.
912  *
913  *  return 0 on success, positive on failure
914  **********************************************************************/
915
916 static int
917 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cred)
918 {
919         struct adapter  *adapter = ifp->if_softc;
920         struct ifreq *ifr = (struct ifreq *)data;
921         int error = 0;
922
923         if (adapter->in_detach)
924                 return (error);
925
926         switch (command) {
927         case SIOCSIFMTU:
928             {
929                 int max_frame_size;
930
931                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
932
933                 IGB_CORE_LOCK(adapter);
934                 max_frame_size = 9234;
935                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
936                     ETHER_CRC_LEN) {
937                         IGB_CORE_UNLOCK(adapter);
938                         error = EINVAL;
939                         break;
940                 }
941
942                 ifp->if_mtu = ifr->ifr_mtu;
943                 adapter->max_frame_size =
944                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
945                 igb_init_locked(adapter);
946                 IGB_CORE_UNLOCK(adapter);
947                 break;
948             }
949         case SIOCSIFFLAGS:
950                 IOCTL_DEBUGOUT("ioctl rcv'd:\
951                     SIOCSIFFLAGS (Set Interface Flags)");
952                 IGB_CORE_LOCK(adapter);
953                 if (ifp->if_flags & IFF_UP) {
954                         if ((ifp->if_flags & IFF_RUNNING)) {
955                                 if ((ifp->if_flags ^ adapter->if_flags) &
956                                     (IFF_PROMISC | IFF_ALLMULTI)) {
957                                         igb_disable_promisc(adapter);
958                                         igb_set_promisc(adapter);
959                                 }
960                         } else
961                                 igb_init_locked(adapter);
962                 } else
963                         if (ifp->if_flags & IFF_RUNNING)
964                                 igb_stop(adapter); 
965                 adapter->if_flags = ifp->if_flags;
966                 IGB_CORE_UNLOCK(adapter);
967                 break;
968         case SIOCADDMULTI:
969         case SIOCDELMULTI:
970                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
971                 if (ifp->if_flags & IFF_RUNNING) {
972                         IGB_CORE_LOCK(adapter);
973                         igb_disable_intr(adapter);
974                         igb_set_multi(adapter);
975 #ifdef DEVICE_POLLING
976                         if ((ifp->if_flags & IFF_POLLING) == 0)
977 #endif
978                                 igb_enable_intr(adapter);
979                         IGB_CORE_UNLOCK(adapter);
980                 }
981                 break;
982         case SIOCSIFMEDIA:
983                 /* Check SOL/IDER usage */
984                 IGB_CORE_LOCK(adapter);
985                 if (e1000_check_reset_block(&adapter->hw)) {
986                         IGB_CORE_UNLOCK(adapter);
987                         device_printf(adapter->dev, "Media change is"
988                             " blocked due to SOL/IDER session.\n");
989                         break;
990                 }
991                 IGB_CORE_UNLOCK(adapter);
992         case SIOCGIFMEDIA:
993                 IOCTL_DEBUGOUT("ioctl rcv'd: \
994                     SIOCxIFMEDIA (Get/Set Interface Media)");
995                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
996                 break;
997         case SIOCSIFCAP:
998             {
999                 int mask, reinit;
1000
1001                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1002                 reinit = 0;
1003                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1004 #ifdef DEVICE_POLLING
1005                 if (ifp->if_flags & IFF_POLLING) {
1006                         IGB_CORE_LOCK(adapter);
1007                         igb_disable_intr(adapter);
1008                         IGB_CORE_UNLOCK(adapter);
1009                 }
1010 #endif
1011                 if (mask & IFCAP_HWCSUM) {
1012                         ifp->if_capenable ^= IFCAP_HWCSUM;
1013                         reinit = 1;
1014                 }
1015 #ifdef NET_TSO 
1016                 if (mask & IFCAP_TSO4) {
1017                         ifp->if_capenable ^= IFCAP_TSO4;
1018                         reinit = 1;
1019                 }
1020 #endif
1021                 if (mask & IFCAP_VLAN_HWTAGGING) {
1022                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1023                         reinit = 1;
1024                 }
1025 #ifdef NET_LRO 
1026                 if (mask & IFCAP_LRO) {
1027                         ifp->if_capenable ^= IFCAP_LRO;
1028                         reinit = 1;
1029                 }
1030 #endif
1031                 if (reinit && (ifp->if_flags & IFF_RUNNING))
1032                         igb_init(adapter);
1033 #if 0
1034                 VLAN_CAPABILITIES(ifp);
1035 #endif
1036                 break;
1037             }
1038
1039         default:
1040                 error = ether_ioctl(ifp, command, data);
1041                 break;
1042         }
1043         IOCTL_DEBUGOUT("ioctl done");
1044
1045         return (error);
1046 }
1047
1048
1049 /*********************************************************************
1050  *  Init entry point
1051  *
1052  *  This routine is used in two ways. It is used by the stack as
1053  *  init entry point in network interface structure. It is also used
1054  *  by the driver as a hw/sw initialization routine to get to a
1055  *  consistent state.
1056  *
1057  *  return 0 on success, positive on failure
1058  **********************************************************************/
1059
1060 static void
1061 igb_init_locked(struct adapter *adapter)
1062 {
1063         struct ifnet    *ifp = adapter->ifp;
1064         device_t        dev = adapter->dev;
1065
1066         INIT_DEBUGOUT("igb_init: begin");
1067
1068         IGB_CORE_LOCK_ASSERT(adapter);
1069
1070         igb_disable_intr(adapter);
1071         callout_stop(&adapter->timer);
1072
1073         /* Get the latest mac address, User can use a LAA */
1074         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1075               ETHER_ADDR_LEN);
1076
1077         /* Put the address into the Receive Address Array */
1078         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1079
1080         igb_reset(adapter);
1081         igb_update_link_status(adapter);
1082
1083         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1084
1085         /* Set hardware offload abilities */
1086         ifp->if_hwassist = 0;
1087         if (ifp->if_capenable & IFCAP_TXCSUM) {
1088                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1089 #if __FreeBSD_version >= 800000
1090                 if (adapter->hw.mac.type == e1000_82576)
1091                         ifp->if_hwassist |= CSUM_SCTP;
1092 #endif
1093         }
1094
1095 #ifdef NET_TSO
1096         if (ifp->if_capenable & IFCAP_TSO4)
1097                 ifp->if_hwassist |= CSUM_TSO;
1098 #endif
1099
1100         /* Configure for OS presence */
1101         igb_init_manageability(adapter);
1102
1103         /* Prepare transmit descriptors and buffers */
1104         igb_setup_transmit_structures(adapter);
1105         igb_initialize_transmit_units(adapter);
1106
1107         /* Setup Multicast table */
1108         igb_set_multi(adapter);
1109
1110         /*
1111         ** Figure out the desired mbuf pool
1112         ** for doing jumbo/packetsplit
1113         */
1114         if (ifp->if_mtu > ETHERMTU)
1115                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1116         else
1117                 adapter->rx_mbuf_sz = MCLBYTES;
1118
1119         /* Prepare receive descriptors and buffers */
1120         if (igb_setup_receive_structures(adapter)) {
1121                 device_printf(dev, "Could not setup receive structures\n");
1122                 return;
1123         }
1124         igb_initialize_receive_units(adapter);
1125
1126         /* Don't lose promiscuous settings */
1127         igb_set_promisc(adapter);
1128
1129         ifp->if_flags |= IFF_RUNNING;
1130         ifp->if_flags &= ~IFF_OACTIVE;
1131
1132         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1133         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1134
1135         if (adapter->msix > 1) /* Set up queue routing */
1136                 igb_configure_queues(adapter);
1137
1138         /* Set up VLAN tag offload and filter */
1139         igb_setup_vlan_hw_support(adapter);
1140
1141         /* this clears any pending interrupts */
1142         E1000_READ_REG(&adapter->hw, E1000_ICR);
1143 #ifdef DEVICE_POLLING
1144         /*
1145          * Only enable interrupts if we are not polling, make sure
1146          * they are off otherwise.
1147          */
1148         if (ifp->if_flags & IFF_POLLING)
1149                 igb_disable_intr(adapter);
1150         else
1151 #endif /* DEVICE_POLLING */
1152         {
1153         igb_enable_intr(adapter);
1154         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1155         }
1156
1157         /* Don't reset the phy next time init gets called */
1158         adapter->hw.phy.reset_disable = TRUE;
1159         INIT_DEBUGOUT("igb_init: end");
1160 }
1161
1162 static void
1163 igb_init(void *arg)
1164 {
1165         struct adapter *adapter = arg;
1166
1167         IGB_CORE_LOCK(adapter);
1168         igb_init_locked(adapter);
1169         IGB_CORE_UNLOCK(adapter);
1170 }
1171
1172
1173 static void
1174 igb_handle_rxtx(void *context, int pending)
1175 {
1176         struct adapter  *adapter = context;
1177         struct tx_ring  *txr = adapter->tx_rings;
1178         struct rx_ring  *rxr = adapter->rx_rings;
1179         struct ifnet    *ifp;
1180
1181         ifp = adapter->ifp;
1182
1183         if (ifp->if_flags & IFF_RUNNING) {
1184                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1185                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1186                 IGB_TX_LOCK(txr);
1187                 igb_txeof(txr);
1188
1189 #if __FreeBSD_version >= 800000
1190                 if (!drbr_empty(ifp, txr->br))
1191                         igb_mq_start_locked(ifp, txr, NULL);
1192 #else
1193                 if (!ifq_is_empty(&ifp->if_snd))
1194                         igb_start_locked(txr, ifp);
1195 #endif
1196                 IGB_TX_UNLOCK(txr);
1197         }
1198
1199         igb_enable_intr(adapter);
1200 }
1201
1202 static void
1203 igb_handle_que(void *context, int pending)
1204 {
1205         struct igb_queue *que = context;
1206         struct adapter *adapter = que->adapter;
1207         struct tx_ring *txr = que->txr;
1208         struct rx_ring *rxr = que->rxr;
1209         struct ifnet    *ifp = adapter->ifp;
1210         u32             loop = IGB_MAX_LOOP;
1211         bool            more;
1212
1213         /* RX first */
1214         do {
1215                 more = igb_rxeof(rxr, -1);
1216         } while (loop-- && more);
1217
1218         if (IGB_TX_TRYLOCK(txr)) {
1219                 loop = IGB_MAX_LOOP;
1220                 do {
1221                         more = igb_txeof(txr);
1222                 } while (loop-- && more);
1223 #if __FreeBSD_version >= 800000
1224                 igb_mq_start_locked(ifp, txr, NULL);
1225 #else
1226                 if (!ifq_is_empty(&ifp->if_snd))
1227                         igb_start_locked(txr, ifp);
1228 #endif
1229                 IGB_TX_UNLOCK(txr);
1230         }
1231
1232         /* Reenable this interrupt */
1233 #ifdef DEVICE_POLLING
1234         if ((ifp->if_flags & IFF_POLLING) == 0)
1235 #endif
1236                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1237 }
1238
1239 /* Deal with link in a sleepable context */
1240 static void
1241 igb_handle_link(void *context, int pending)
1242 {
1243         struct adapter *adapter = context;
1244
1245         adapter->hw.mac.get_link_status = 1;
1246         igb_update_link_status(adapter);
1247 }
1248
1249 /*********************************************************************
1250  *
1251  *  MSI/Legacy Deferred
1252  *  Interrupt Service routine  
1253  *
1254  *********************************************************************/
1255 #define FILTER_STRAY
1256 #define FILTER_HANDLED
1257 static void
1258 igb_irq_fast(void *arg)
1259 {
1260         struct adapter  *adapter = arg;
1261         uint32_t        reg_icr;
1262
1263
1264         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1265
1266         /* Hot eject?  */
1267         if (reg_icr == 0xffffffff)
1268                 return FILTER_STRAY; 
1269
1270         /* Definitely not our interrupt.  */
1271         if (reg_icr == 0x0)
1272                 return FILTER_STRAY;
1273
1274         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1275                 return FILTER_STRAY;
1276
1277         /*
1278          * Mask interrupts until the taskqueue is finished running.  This is
1279          * cheap, just assume that it is needed.  This also works around the
1280          * MSI message reordering errata on certain systems.
1281          */
1282         igb_disable_intr(adapter);
1283         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1284
1285         /* Link status change */
1286         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1287                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1288
1289         if (reg_icr & E1000_ICR_RXO)
1290                 adapter->rx_overruns++;
1291         return FILTER_HANDLED;
1292 }
1293
1294 #ifdef DEVICE_POLLING
1295 /*********************************************************************
1296  *
1297  *  Legacy polling routine  
1298  *
1299  *********************************************************************/
1300 #if __FreeBSD_version >= 800000
1301 #define POLL_RETURN_COUNT(a) (a)
1302 static int
1303 #else
1304 #define POLL_RETURN_COUNT(a)
1305 static void
1306 #endif
1307 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1308 {
1309         struct adapter *adapter = ifp->if_softc;
1310         struct rx_ring  *rxr = adapter->rx_rings;
1311         struct tx_ring  *txr = adapter->tx_rings;
1312         u32             reg_icr, rx_done = 0;
1313         u32             loop = IGB_MAX_LOOP;
1314         bool            more;
1315
1316         IGB_CORE_LOCK(adapter);
1317         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1318                 IGB_CORE_UNLOCK(adapter);
1319                 return POLL_RETURN_COUNT(rx_done);
1320         }
1321
1322         if (cmd == POLL_AND_CHECK_STATUS) {
1323                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1324                 /* Link status change */
1325                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1326                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1327
1328                 if (reg_icr & E1000_ICR_RXO)
1329                         adapter->rx_overruns++;
1330         }
1331         IGB_CORE_UNLOCK(adapter);
1332
1333         /* TODO: rx_count */
1334         rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1335
1336         IGB_TX_LOCK(txr);
1337         do {
1338                 more = igb_txeof(txr);
1339         } while (loop-- && more);
1340 #if __FreeBSD_version >= 800000
1341         if (!drbr_empty(ifp, txr->br))
1342                 igb_mq_start_locked(ifp, txr, NULL);
1343 #else
1344         if (!ifq_is_empty(&ifp->if_snd))
1345                 igb_start_locked(txr, ifp);
1346 #endif
1347         IGB_TX_UNLOCK(txr);
1348         return POLL_RETURN_COUNT(rx_done);
1349 }
1350 #endif /* DEVICE_POLLING */
1351
1352 /*********************************************************************
1353  *
1354  *  MSIX TX Interrupt Service routine
1355  *
1356  **********************************************************************/
1357 static void
1358 igb_msix_que(void *arg)
1359 {
1360         struct igb_queue *que = arg;
1361         struct adapter *adapter = que->adapter;
1362         struct tx_ring *txr = que->txr;
1363         struct rx_ring *rxr = que->rxr;
1364         u32             newitr = 0;
1365         bool            more_tx, more_rx;
1366
1367         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1368         ++que->irqs;
1369
1370         IGB_TX_LOCK(txr);
1371         more_tx = igb_txeof(txr);
1372         IGB_TX_UNLOCK(txr);
1373
1374         more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1375
1376         if (igb_enable_aim == FALSE)
1377                 goto no_calc;
1378         /*
1379         ** Do Adaptive Interrupt Moderation:
1380         **  - Write out last calculated setting
1381         **  - Calculate based on average size over
1382         **    the last interval.
1383         */
1384         if (que->eitr_setting)
1385                 E1000_WRITE_REG(&adapter->hw,
1386                     E1000_EITR(que->msix), que->eitr_setting);
1387  
1388         que->eitr_setting = 0;
1389
1390         /* Idle, do nothing */
1391         if ((txr->bytes == 0) && (rxr->bytes == 0))
1392                 goto no_calc;
1393                                 
1394         /* Used half Default if sub-gig */
1395         if (adapter->link_speed != 1000)
1396                 newitr = IGB_DEFAULT_ITR / 2;
1397         else {
1398                 if ((txr->bytes) && (txr->packets))
1399                         newitr = txr->bytes/txr->packets;
1400                 if ((rxr->bytes) && (rxr->packets))
1401                         newitr = max(newitr,
1402                             (rxr->bytes / rxr->packets));
1403                 newitr += 24; /* account for hardware frame, crc */
1404                 /* set an upper boundary */
1405                 newitr = min(newitr, 3000);
1406                 /* Be nice to the mid range */
1407                 if ((newitr > 300) && (newitr < 1200))
1408                         newitr = (newitr / 3);
1409                 else
1410                         newitr = (newitr / 2);
1411         }
1412         newitr &= 0x7FFC;  /* Mask invalid bits */
1413         if (adapter->hw.mac.type == e1000_82575)
1414                 newitr |= newitr << 16;
1415         else
1416                 newitr |= 0x8000000;
1417                  
1418         /* save for next interrupt */
1419         que->eitr_setting = newitr;
1420
1421         /* Reset state */
1422         txr->bytes = 0;
1423         txr->packets = 0;
1424         rxr->bytes = 0;
1425         rxr->packets = 0;
1426
1427 no_calc:
1428         /* Schedule a clean task if needed*/
1429         if (more_tx || more_rx) 
1430                 taskqueue_enqueue(que->tq, &que->que_task);
1431         else
1432                 /* Reenable this interrupt */
1433                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1434         return;
1435 }
1436
1437
1438 /*********************************************************************
1439  *
1440  *  MSIX Link Interrupt Service routine
1441  *
1442  **********************************************************************/
1443
1444 static void
1445 igb_msix_link(void *arg)
1446 {
1447         struct adapter  *adapter = arg;
1448         u32             icr;
1449
1450         ++adapter->link_irq;
1451         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1452         if (!(icr & E1000_ICR_LSC))
1453                 goto spurious;
1454         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1455
1456 spurious:
1457         /* Rearm */
1458         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1459         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1460         return;
1461 }
1462
1463
1464 /*********************************************************************
1465  *
1466  *  Media Ioctl callback
1467  *
1468  *  This routine is called whenever the user queries the status of
1469  *  the interface using ifconfig.
1470  *
1471  **********************************************************************/
1472 static void
1473 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1474 {
1475         struct adapter *adapter = ifp->if_softc;
1476         u_char fiber_type = IFM_1000_SX;
1477
1478         INIT_DEBUGOUT("igb_media_status: begin");
1479
1480         IGB_CORE_LOCK(adapter);
1481         igb_update_link_status(adapter);
1482
1483         ifmr->ifm_status = IFM_AVALID;
1484         ifmr->ifm_active = IFM_ETHER;
1485
1486         if (!adapter->link_active) {
1487                 IGB_CORE_UNLOCK(adapter);
1488                 return;
1489         }
1490
1491         ifmr->ifm_status |= IFM_ACTIVE;
1492
1493         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1494             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1495                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1496         else {
1497                 switch (adapter->link_speed) {
1498                 case 10:
1499                         ifmr->ifm_active |= IFM_10_T;
1500                         break;
1501                 case 100:
1502                         ifmr->ifm_active |= IFM_100_TX;
1503                         break;
1504                 case 1000:
1505                         ifmr->ifm_active |= IFM_1000_T;
1506                         break;
1507                 }
1508                 if (adapter->link_duplex == FULL_DUPLEX)
1509                         ifmr->ifm_active |= IFM_FDX;
1510                 else
1511                         ifmr->ifm_active |= IFM_HDX;
1512         }
1513         IGB_CORE_UNLOCK(adapter);
1514 }
1515
1516 /*********************************************************************
1517  *
1518  *  Media Ioctl callback
1519  *
1520  *  This routine is called when the user changes speed/duplex using
1521  *  media/mediopt option with ifconfig.
1522  *
1523  **********************************************************************/
1524 static int
1525 igb_media_change(struct ifnet *ifp)
1526 {
1527         struct adapter *adapter = ifp->if_softc;
1528         struct ifmedia  *ifm = &adapter->media;
1529
1530         INIT_DEBUGOUT("igb_media_change: begin");
1531
1532         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1533                 return (EINVAL);
1534
1535         IGB_CORE_LOCK(adapter);
1536         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1537         case IFM_AUTO:
1538                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1539                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1540                 break;
1541         case IFM_1000_LX:
1542         case IFM_1000_SX:
1543         case IFM_1000_T:
1544                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1545                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1546                 break;
1547         case IFM_100_TX:
1548                 adapter->hw.mac.autoneg = FALSE;
1549                 adapter->hw.phy.autoneg_advertised = 0;
1550                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1551                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1552                 else
1553                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1554                 break;
1555         case IFM_10_T:
1556                 adapter->hw.mac.autoneg = FALSE;
1557                 adapter->hw.phy.autoneg_advertised = 0;
1558                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1559                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1560                 else
1561                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1562                 break;
1563         default:
1564                 device_printf(adapter->dev, "Unsupported media type\n");
1565         }
1566
1567         /* As the speed/duplex settings my have changed we need to
1568          * reset the PHY.
1569          */
1570         adapter->hw.phy.reset_disable = FALSE;
1571
1572         igb_init_locked(adapter);
1573         IGB_CORE_UNLOCK(adapter);
1574
1575         return (0);
1576 }
1577
1578
1579 /*********************************************************************
1580  *
1581  *  This routine maps the mbufs to Advanced TX descriptors.
1582  *  used by the 82575 adapter.
1583  *  
1584  **********************************************************************/
1585
1586 static int
1587 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1588 {
1589         struct adapter          *adapter = txr->adapter;
1590         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1591         bus_dmamap_t            map;
1592         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1593         union e1000_adv_tx_desc *txd = NULL;
1594         struct mbuf             *m_head;
1595         u32                     olinfo_status = 0, cmd_type_len = 0;
1596         int                     nsegs, i, j, error, first, last = 0;
1597         u32                     hdrlen = 0;
1598
1599         m_head = *m_headp;
1600
1601
1602         /* Set basic descriptor constants */
1603         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1604         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1605         if (m_head->m_flags & M_VLANTAG)
1606                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1607
1608         /*
1609          * Force a cleanup if number of TX descriptors
1610          * available hits the threshold
1611          */
1612         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1613                 igb_txeof(txr);
1614                 /* Now do we at least have a minimal? */
1615                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1616                         txr->no_desc_avail++;
1617                         return (ENOBUFS);
1618                 }
1619         }
1620
1621         /*
1622          * Map the packet for DMA.
1623          *
1624          * Capture the first descriptor index,
1625          * this descriptor will have the index
1626          * of the EOP which is the only one that
1627          * now gets a DONE bit writeback.
1628          */
1629         first = txr->next_avail_desc;
1630         tx_buffer = &txr->tx_buffers[first];
1631         tx_buffer_mapped = tx_buffer;
1632         map = tx_buffer->map;
1633
1634         error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1635             *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1636
1637         if (error == EFBIG) {
1638                 struct mbuf *m;
1639
1640                 m = m_defrag(*m_headp, MB_DONTWAIT);
1641                 if (m == NULL) {
1642                         adapter->mbuf_defrag_failed++;
1643                         m_freem(*m_headp);
1644                         *m_headp = NULL;
1645                         return (ENOBUFS);
1646                 }
1647                 *m_headp = m;
1648
1649                 /* Try it again */
1650                 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1651                     *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1652
1653                 if (error == ENOMEM) {
1654                         adapter->no_tx_dma_setup++;
1655                         return (error);
1656                 } else if (error != 0) {
1657                         adapter->no_tx_dma_setup++;
1658                         m_freem(*m_headp);
1659                         *m_headp = NULL;
1660                         return (error);
1661                 }
1662         } else if (error == ENOMEM) {
1663                 adapter->no_tx_dma_setup++;
1664                 return (error);
1665         } else if (error != 0) {
1666                 adapter->no_tx_dma_setup++;
1667                 m_freem(*m_headp);
1668                 *m_headp = NULL;
1669                 return (error);
1670         }
1671
1672         /* Check again to be sure we have enough descriptors */
1673         if (nsegs > (txr->tx_avail - 2)) {
1674                 txr->no_desc_avail++;
1675                 bus_dmamap_unload(txr->txtag, map);
1676                 return (ENOBUFS);
1677         }
1678         m_head = *m_headp;
1679
1680         /*
1681          * Set up the context descriptor:
1682          * used when any hardware offload is done.
1683          * This includes CSUM, VLAN, and TSO. It
1684          * will use the first descriptor.
1685          */
1686 #ifdef NET_TSO
1687         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1688                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1689                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1690                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1691                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1692                 } else
1693                         return (ENXIO); 
1694         } else
1695 #endif
1696                if (igb_tx_ctx_setup(txr, m_head))
1697                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1698
1699         /* Calculate payload length */
1700         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1701             << E1000_ADVTXD_PAYLEN_SHIFT);
1702
1703         /* 82575 needs the queue index added */
1704         if (adapter->hw.mac.type == e1000_82575)
1705                 olinfo_status |= txr->me << 4;
1706
1707         /* Set up our transmit descriptors */
1708         i = txr->next_avail_desc;
1709         for (j = 0; j < nsegs; j++) {
1710                 bus_size_t seg_len;
1711                 bus_addr_t seg_addr;
1712
1713                 tx_buffer = &txr->tx_buffers[i];
1714                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1715                 seg_addr = segs[j].ds_addr;
1716                 seg_len  = segs[j].ds_len;
1717
1718                 txd->read.buffer_addr = htole64(seg_addr);
1719                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1720                 txd->read.olinfo_status = htole32(olinfo_status);
1721                 last = i;
1722                 if (++i == adapter->num_tx_desc)
1723                         i = 0;
1724                 tx_buffer->m_head = NULL;
1725                 tx_buffer->next_eop = -1;
1726         }
1727
1728         txr->next_avail_desc = i;
1729         txr->tx_avail -= nsegs;
1730
1731         tx_buffer->m_head = m_head;
1732         tx_buffer_mapped->map = tx_buffer->map;
1733         tx_buffer->map = map;
1734         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1735
1736         /*
1737          * Last Descriptor of Packet
1738          * needs End Of Packet (EOP)
1739          * and Report Status (RS)
1740          */
1741         txd->read.cmd_type_len |=
1742             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1743         /*
1744          * Keep track in the first buffer which
1745          * descriptor will be written back
1746          */
1747         tx_buffer = &txr->tx_buffers[first];
1748         tx_buffer->next_eop = last;
1749         txr->watchdog_time = ticks;
1750
1751         /*
1752          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1753          * that this frame is available to transmit.
1754          */
1755         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1756             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1757         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1758         ++txr->tx_packets;
1759
1760         return (0);
1761
1762 }
1763
1764 static void
1765 igb_set_promisc(struct adapter *adapter)
1766 {
1767         struct ifnet    *ifp = adapter->ifp;
1768         uint32_t        reg_rctl;
1769
1770         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1771
1772         if (ifp->if_flags & IFF_PROMISC) {
1773                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1774                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1775         } else if (ifp->if_flags & IFF_ALLMULTI) {
1776                 reg_rctl |= E1000_RCTL_MPE;
1777                 reg_rctl &= ~E1000_RCTL_UPE;
1778                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1779         }
1780 }
1781
1782 static void
1783 igb_disable_promisc(struct adapter *adapter)
1784 {
1785         uint32_t        reg_rctl;
1786
1787         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1788
1789         reg_rctl &=  (~E1000_RCTL_UPE);
1790         reg_rctl &=  (~E1000_RCTL_MPE);
1791         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1792 }
1793
1794
1795 /*********************************************************************
1796  *  Multicast Update
1797  *
1798  *  This routine is called whenever multicast address list is updated.
1799  *
1800  **********************************************************************/
1801
1802 static void
1803 igb_set_multi(struct adapter *adapter)
1804 {
1805         struct ifnet    *ifp = adapter->ifp;
1806         struct ifmultiaddr *ifma;
1807         u32 reg_rctl = 0;
1808         static u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1809
1810         int mcnt = 0;
1811
1812         IOCTL_DEBUGOUT("igb_set_multi: begin");
1813
1814 #if 0
1815 #if __FreeBSD_version < 800000
1816         IF_ADDR_LOCK(ifp);
1817 #else
1818         if_maddr_rlock(ifp);
1819 #endif
1820 #endif
1821
1822         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1823                 if (ifma->ifma_addr->sa_family != AF_LINK)
1824                         continue;
1825
1826                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1827                         break;
1828
1829                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1830                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1831                 mcnt++;
1832         }
1833 #if 0
1834 #if __FreeBSD_version < 800000
1835         IF_ADDR_UNLOCK(ifp);
1836 #else
1837         if_maddr_runlock(ifp);
1838 #endif
1839 #endif
1840
1841         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1842                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1843                 reg_rctl |= E1000_RCTL_MPE;
1844                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1845         } else {
1846                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1847         }
1848 }
1849
1850
1851 /*********************************************************************
1852  *  Timer routine:
1853  *      This routine checks for link status,
1854  *      updates statistics, and does the watchdog.
1855  *
1856  **********************************************************************/
1857
1858 static void
1859 igb_local_timer(void *arg)
1860 {
1861         struct adapter          *adapter = arg;
1862
1863         IGB_CORE_LOCK(adapter);
1864
1865         struct ifnet            *ifp = adapter->ifp;
1866         device_t                dev = adapter->dev;
1867         struct tx_ring          *txr = adapter->tx_rings;
1868
1869
1870         IGB_CORE_LOCK_ASSERT(adapter);
1871
1872         igb_update_link_status(adapter);
1873         igb_update_stats_counters(adapter);
1874
1875         if (igb_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1876                 igb_print_hw_stats(adapter);
1877
1878         /*
1879         ** Watchdog: check for time since any descriptor was cleaned
1880         */
1881         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1882                 if (txr->watchdog_check == FALSE)
1883                         continue;
1884                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1885                         goto timeout;
1886         }
1887
1888         /* Trigger an RX interrupt on all queues */
1889 #ifdef DEVICE_POLLING
1890         if ((ifp->if_flags & IFF_POLLING) == 0)
1891 #endif
1892                 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1893         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1894         IGB_CORE_UNLOCK(adapter);
1895         return;
1896
1897 timeout:
1898         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1899         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1900             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1901             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1902         device_printf(dev,"TX(%d) desc avail = %d,"
1903             "Next TX to Clean = %d\n",
1904             txr->me, txr->tx_avail, txr->next_to_clean);
1905         adapter->ifp->if_flags &= ~IFF_RUNNING;
1906         adapter->watchdog_events++;
1907         igb_init_locked(adapter);
1908         IGB_CORE_UNLOCK(adapter);
1909 }
1910
1911 static void
1912 igb_update_link_status(struct adapter *adapter)
1913 {
1914         struct e1000_hw *hw = &adapter->hw;
1915         struct ifnet *ifp = adapter->ifp;
1916         device_t dev = adapter->dev;
1917         struct tx_ring *txr = adapter->tx_rings;
1918         u32 link_check = 0;
1919
1920         /* Get the cached link value or read for real */
1921         switch (hw->phy.media_type) {
1922         case e1000_media_type_copper:
1923                 if (hw->mac.get_link_status) {
1924                         /* Do the work to read phy */
1925                         e1000_check_for_link(hw);
1926                         link_check = !hw->mac.get_link_status;
1927                 } else
1928                         link_check = TRUE;
1929                 break;
1930         case e1000_media_type_fiber:
1931                 e1000_check_for_link(hw);
1932                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1933                                  E1000_STATUS_LU);
1934                 break;
1935         case e1000_media_type_internal_serdes:
1936                 e1000_check_for_link(hw);
1937                 link_check = adapter->hw.mac.serdes_has_link;
1938                 break;
1939         default:
1940         case e1000_media_type_unknown:
1941                 break;
1942         }
1943
1944         /* Now we check if a transition has happened */
1945         if (link_check && (adapter->link_active == 0)) {
1946                 e1000_get_speed_and_duplex(&adapter->hw, 
1947                     &adapter->link_speed, &adapter->link_duplex);
1948                 if (bootverbose)
1949                         device_printf(dev, "Link is up %d Mbps %s\n",
1950                             adapter->link_speed,
1951                             ((adapter->link_duplex == FULL_DUPLEX) ?
1952                             "Full Duplex" : "Half Duplex"));
1953                 adapter->link_active = 1;
1954                 ifp->if_baudrate = adapter->link_speed * 1000000;
1955                 ifp->if_link_state = LINK_STATE_UP;
1956                 if_link_state_change(ifp);
1957         } else if (!link_check && (adapter->link_active == 1)) {
1958                 ifp->if_baudrate = adapter->link_speed = 0;
1959                 adapter->link_duplex = 0;
1960                 if (bootverbose)
1961                         device_printf(dev, "Link is Down\n");
1962                 adapter->link_active = 0;
1963                 ifp->if_link_state = LINK_STATE_DOWN;
1964                 if_link_state_change(ifp);
1965                 /* Turn off watchdogs */
1966                 for (int i = 0; i < adapter->num_queues; i++, txr++)
1967                         txr->watchdog_check = FALSE;
1968         }
1969 }
1970
1971 /*********************************************************************
1972  *
1973  *  This routine disables all traffic on the adapter by issuing a
1974  *  global reset on the MAC and deallocates TX/RX buffers.
1975  *
1976  **********************************************************************/
1977
1978 static void
1979 igb_stop(void *arg)
1980 {
1981         struct adapter  *adapter = arg;
1982         struct ifnet    *ifp = adapter->ifp;
1983         struct tx_ring *txr = adapter->tx_rings;
1984
1985         IGB_CORE_LOCK_ASSERT(adapter);
1986
1987         INIT_DEBUGOUT("igb_stop: begin");
1988
1989         igb_disable_intr(adapter);
1990
1991         callout_stop(&adapter->timer);
1992
1993         /* Tell the stack that the interface is no longer active */
1994         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1995
1996         /* Unarm watchdog timer. */
1997         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1998                 IGB_TX_LOCK(txr);
1999                 txr->watchdog_check = FALSE;
2000                 IGB_TX_UNLOCK(txr);
2001         }
2002
2003         e1000_reset_hw(&adapter->hw);
2004         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2005 }
2006
2007
2008 /*********************************************************************
2009  *
2010  *  Determine hardware revision.
2011  *
2012  **********************************************************************/
2013 static void
2014 igb_identify_hardware(struct adapter *adapter)
2015 {
2016         device_t dev = adapter->dev;
2017
2018         /* Make sure our PCI config space has the necessary stuff set */
2019         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2020         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2021             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2022                 device_printf(dev, "Memory Access and/or Bus Master bits "
2023                     "were not set!\n");
2024                 adapter->hw.bus.pci_cmd_word |=
2025                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2026                 pci_write_config(dev, PCIR_COMMAND,
2027                     adapter->hw.bus.pci_cmd_word, 2);
2028         }
2029
2030         /* Save off the information about this board */
2031         adapter->hw.vendor_id = pci_get_vendor(dev);
2032         adapter->hw.device_id = pci_get_device(dev);
2033         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2034         adapter->hw.subsystem_vendor_id =
2035             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2036         adapter->hw.subsystem_device_id =
2037             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2038
2039         /* Do Shared Code Init and Setup */
2040         if (e1000_set_mac_type(&adapter->hw)) {
2041                 device_printf(dev, "Setup init failure\n");
2042                 return;
2043         }
2044 }
2045
2046 static int
2047 igb_allocate_pci_resources(struct adapter *adapter)
2048 {
2049         device_t        dev = adapter->dev;
2050         int             rid;
2051
2052         rid = PCIR_BAR(0);
2053         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2054             &rid, RF_ACTIVE);
2055         if (adapter->pci_mem == NULL) {
2056                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2057                 return (ENXIO);
2058         }
2059         adapter->osdep.mem_bus_space_tag =
2060             rman_get_bustag(adapter->pci_mem);
2061         adapter->osdep.mem_bus_space_handle =
2062             rman_get_bushandle(adapter->pci_mem);
2063         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2064
2065         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2066
2067         /* This will setup either MSI/X or MSI */
2068         adapter->msix = igb_setup_msix(adapter);
2069         adapter->hw.back = &adapter->osdep;
2070
2071         return (0);
2072 }
2073
2074 /*********************************************************************
2075  *
2076  *  Setup the Legacy or MSI Interrupt handler
2077  *
2078  **********************************************************************/
2079 static int
2080 igb_allocate_legacy(struct adapter *adapter)
2081 {
2082         device_t dev = adapter->dev;
2083         int error, rid = 0;
2084
2085         /* Turn off all interrupts */
2086         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2087
2088 #if 0
2089         /* MSI RID is 1 */
2090         if (adapter->msix == 1)
2091                 rid = 1;
2092 #endif
2093         rid = 0;
2094         /* We allocate a single interrupt resource */
2095         adapter->res = bus_alloc_resource_any(dev,
2096             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2097         if (adapter->res == NULL) {
2098                 device_printf(dev, "Unable to allocate bus resource: "
2099                     "interrupt\n");
2100                 return (ENXIO);
2101         }
2102
2103         /*
2104          * Try allocating a fast interrupt and the associated deferred
2105          * processing contexts.
2106          */
2107         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2108         /* Make tasklet for deferred link handling */
2109         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2110         adapter->tq = taskqueue_create("igb_taskq", M_INTWAIT,
2111             taskqueue_thread_enqueue, &adapter->tq);
2112         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2113             device_get_nameunit(adapter->dev));
2114         if ((error = bus_setup_intr(dev, adapter->res,
2115             /*INTR_TYPE_NET |*/ INTR_MPSAFE, igb_irq_fast,
2116             adapter, &adapter->tag, NULL)) != 0) {
2117                 device_printf(dev, "Failed to register fast interrupt "
2118                             "handler: %d\n", error);
2119                 taskqueue_free(adapter->tq);
2120                 adapter->tq = NULL;
2121                 return (error);
2122         }
2123
2124         return (0);
2125 }
2126
2127
2128 /*********************************************************************
2129  *
2130  *  Setup the MSIX Queue Interrupt handlers: 
2131  *
2132  **********************************************************************/
2133 static int
2134 igb_allocate_msix(struct adapter *adapter)
2135 {
2136         device_t                dev = adapter->dev;
2137         struct igb_queue        *que = adapter->queues;
2138         int                     error, rid, vector = 0;
2139
2140
2141         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2142                 rid = vector + 1;
2143                 que->res = bus_alloc_resource_any(dev,
2144                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2145                 if (que->res == NULL) {
2146                         device_printf(dev,
2147                             "Unable to allocate bus resource: "
2148                             "MSIX Queue Interrupt\n");
2149                         return (ENXIO);
2150                 }
2151                 error = bus_setup_intr(dev, que->res,
2152                     /*INTR_TYPE_NET |*/ INTR_MPSAFE, 
2153                     igb_msix_que, que, &que->tag, NULL);
2154                 if (error) {
2155                         que->res = NULL;
2156                         device_printf(dev, "Failed to register Queue handler");
2157                         return (error);
2158                 }
2159                 que->msix = vector;
2160                 if (adapter->hw.mac.type == e1000_82575)
2161                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2162                 else
2163                         que->eims = 1 << vector;
2164                 /*
2165                 ** Bind the msix vector, and thus the
2166                 ** rings to the corresponding cpu.
2167                 */
2168 #if 0
2169                 if (adapter->num_queues > 1)
2170                         bus_bind_intr(dev, que->res, i);
2171 #endif
2172                 /* Make tasklet for deferred handling */
2173                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2174                 que->tq = taskqueue_create("igb_que", M_INTWAIT,
2175                     taskqueue_thread_enqueue, &que->tq);
2176                 taskqueue_start_threads(&que->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s que",
2177                     device_get_nameunit(adapter->dev));
2178         }
2179
2180         /* And Link */
2181         rid = vector + 1;
2182         adapter->res = bus_alloc_resource_any(dev,
2183             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2184         if (adapter->res == NULL) {
2185                 device_printf(dev,
2186                     "Unable to allocate bus resource: "
2187                     "MSIX Link Interrupt\n");
2188                 return (ENXIO);
2189         }
2190         if ((error = bus_setup_intr(dev, adapter->res,
2191             /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2192             igb_msix_link, adapter, &adapter->tag, NULL)) != 0) {
2193                 device_printf(dev, "Failed to register Link handler");
2194                 return (error);
2195         }
2196         adapter->linkvec = vector;
2197
2198         /* Make tasklet for deferred handling */
2199         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2200         adapter->tq = taskqueue_create("igb_link", M_INTWAIT,
2201             taskqueue_thread_enqueue, &adapter->tq);
2202         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s link",
2203             device_get_nameunit(adapter->dev));
2204
2205         return (0);
2206 }
2207
2208
2209 static void
2210 igb_configure_queues(struct adapter *adapter)
2211 {
2212         struct  e1000_hw        *hw = &adapter->hw;
2213         struct  igb_queue       *que;
2214         u32                     tmp, ivar = 0;
2215         u32                     newitr = IGB_DEFAULT_ITR;
2216
2217         /* First turn on RSS capability */
2218         if (adapter->hw.mac.type > e1000_82575)
2219                 E1000_WRITE_REG(hw, E1000_GPIE,
2220                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2221                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2222
2223         /* Turn on MSIX */
2224         switch (adapter->hw.mac.type) {
2225         case e1000_82580:
2226                 /* RX entries */
2227                 for (int i = 0; i < adapter->num_queues; i++) {
2228                         u32 index = i >> 1;
2229                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2230                         que = &adapter->queues[i];
2231                         if (i & 1) {
2232                                 ivar &= 0xFF00FFFF;
2233                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2234                         } else {
2235                                 ivar &= 0xFFFFFF00;
2236                                 ivar |= que->msix | E1000_IVAR_VALID;
2237                         }
2238                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2239                 }
2240                 /* TX entries */
2241                 for (int i = 0; i < adapter->num_queues; i++) {
2242                         u32 index = i >> 1;
2243                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2244                         que = &adapter->queues[i];
2245                         if (i & 1) {
2246                                 ivar &= 0x00FFFFFF;
2247                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2248                         } else {
2249                                 ivar &= 0xFFFF00FF;
2250                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2251                         }
2252                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2253                         adapter->eims_mask |= que->eims;
2254                 }
2255
2256                 /* And for the link interrupt */
2257                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2258                 adapter->link_mask = 1 << adapter->linkvec;
2259                 adapter->eims_mask |= adapter->link_mask;
2260                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2261                 break;
2262         case e1000_82576:
2263                 /* RX entries */
2264                 for (int i = 0; i < adapter->num_queues; i++) {
2265                         u32 index = i & 0x7; /* Each IVAR has two entries */
2266                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2267                         que = &adapter->queues[i];
2268                         if (i < 8) {
2269                                 ivar &= 0xFFFFFF00;
2270                                 ivar |= que->msix | E1000_IVAR_VALID;
2271                         } else {
2272                                 ivar &= 0xFF00FFFF;
2273                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2274                         }
2275                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2276                         adapter->eims_mask |= que->eims;
2277                 }
2278                 /* TX entries */
2279                 for (int i = 0; i < adapter->num_queues; i++) {
2280                         u32 index = i & 0x7; /* Each IVAR has two entries */
2281                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2282                         que = &adapter->queues[i];
2283                         if (i < 8) {
2284                                 ivar &= 0xFFFF00FF;
2285                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2286                         } else {
2287                                 ivar &= 0x00FFFFFF;
2288                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2289                         }
2290                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2291                         adapter->eims_mask |= que->eims;
2292                 }
2293
2294                 /* And for the link interrupt */
2295                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2296                 adapter->link_mask = 1 << adapter->linkvec;
2297                 adapter->eims_mask |= adapter->link_mask;
2298                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2299                 break;
2300
2301         case e1000_82575:
2302                 /* enable MSI-X support*/
2303                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2304                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2305                 /* Auto-Mask interrupts upon ICR read. */
2306                 tmp |= E1000_CTRL_EXT_EIAME;
2307                 tmp |= E1000_CTRL_EXT_IRCA;
2308                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2309
2310                 /* Queues */
2311                 for (int i = 0; i < adapter->num_queues; i++) {
2312                         que = &adapter->queues[i];
2313                         tmp = E1000_EICR_RX_QUEUE0 << i;
2314                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2315                         que->eims = tmp;
2316                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2317                             i, que->eims);
2318                         adapter->eims_mask |= que->eims;
2319                 }
2320
2321                 /* Link */
2322                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2323                     E1000_EIMS_OTHER);
2324                 adapter->link_mask |= E1000_EIMS_OTHER;
2325                 adapter->eims_mask |= adapter->link_mask;
2326         default:
2327                 break;
2328         }
2329
2330         /* Set the starting interrupt rate */
2331         if (hw->mac.type == e1000_82575)
2332                 newitr |= newitr << 16;
2333         else
2334                 newitr |= 0x8000000;
2335
2336         for (int i = 0; i < adapter->num_queues; i++) {
2337                 que = &adapter->queues[i];
2338                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2339         }
2340
2341         return;
2342 }
2343
2344
2345 static void
2346 igb_free_pci_resources(struct adapter *adapter)
2347 {
2348         struct          igb_queue *que = adapter->queues;
2349         device_t        dev = adapter->dev;
2350         int             rid;
2351
2352         /*
2353         ** There is a slight possibility of a failure mode
2354         ** in attach that will result in entering this function
2355         ** before interrupt resources have been initialized, and
2356         ** in that case we do not want to execute the loops below
2357         ** We can detect this reliably by the state of the adapter
2358         ** res pointer.
2359         */
2360         if (adapter->res == NULL)
2361                 goto mem;
2362
2363         /*
2364          * First release all the interrupt resources:
2365          */
2366         for (int i = 0; i < adapter->num_queues; i++, que++) {
2367                 rid = que->msix + 1;
2368                 if (que->tag != NULL) {
2369                         bus_teardown_intr(dev, que->res, que->tag);
2370                         que->tag = NULL;
2371                 }
2372                 if (que->res != NULL)
2373                         bus_release_resource(dev,
2374                             SYS_RES_IRQ, rid, que->res);
2375         }
2376
2377         /* Clean the Legacy or Link interrupt last */
2378         if (adapter->linkvec) /* we are doing MSIX */
2379                 rid = adapter->linkvec + 1;
2380         else
2381                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2382
2383         if (adapter->tag != NULL) {
2384                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2385                 adapter->tag = NULL;
2386         }
2387         if (adapter->res != NULL)
2388                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2389
2390 mem:
2391         if (adapter->msix)
2392                 pci_release_msi(dev);
2393
2394         if (adapter->msix_mem != NULL)
2395                 bus_release_resource(dev, SYS_RES_MEMORY,
2396                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2397
2398         if (adapter->pci_mem != NULL)
2399                 bus_release_resource(dev, SYS_RES_MEMORY,
2400                     PCIR_BAR(0), adapter->pci_mem);
2401
2402 }
2403
2404 /*
2405  * Setup Either MSI/X or MSI
2406  */
2407 static int
2408 igb_setup_msix(struct adapter *adapter)
2409 {
2410 #ifdef OLD_MSI
2411         device_t dev = adapter->dev;
2412         int rid, want, queues, msgs;
2413
2414         /* tuneable override */
2415         if (igb_enable_msix == 0)
2416                 goto msi;
2417
2418         /* First try MSI/X */
2419         rid = PCIR_BAR(IGB_MSIX_BAR);
2420         adapter->msix_mem = bus_alloc_resource_any(dev,
2421             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2422         if (!adapter->msix_mem) {
2423                 /* May not be enabled */
2424                 device_printf(adapter->dev,
2425                     "Unable to map MSIX table \n");
2426                 goto msi;
2427         }
2428
2429         msgs = pci_msix_count(dev); 
2430         if (msgs == 0) { /* system has msix disabled */
2431                 bus_release_resource(dev, SYS_RES_MEMORY,
2432                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2433                 adapter->msix_mem = NULL;
2434                 goto msi;
2435         }
2436
2437         /* Figure out a reasonable auto config value */
2438         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2439
2440         /* Can have max of 4 queues on 82575 */
2441         if (adapter->hw.mac.type == e1000_82575) {
2442                 if (queues > 4)
2443                         queues = 4;
2444                 if (igb_num_queues > 4)
2445                         igb_num_queues = 4;
2446         }
2447
2448         if (igb_num_queues == 0)
2449                 igb_num_queues = queues;
2450
2451         /*
2452         ** One vector (RX/TX pair) per queue
2453         ** plus an additional for Link interrupt
2454         */
2455         want = igb_num_queues + 1;
2456         if (msgs >= want)
2457                 msgs = want;
2458         else {
2459                 device_printf(adapter->dev,
2460                     "MSIX Configuration Problem, "
2461                     "%d vectors configured, but %d queues wanted!\n",
2462                     msgs, want);
2463                 return (ENXIO);
2464         }
2465         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2466                 device_printf(adapter->dev,
2467                     "Using MSIX interrupts with %d vectors\n", msgs);
2468                 adapter->num_queues = igb_num_queues;
2469                 return (msgs);
2470         }
2471 msi:
2472         msgs = pci_msi_count(dev);
2473         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2474                 device_printf(adapter->dev,"Using MSI interrupt\n");
2475         return (msgs);
2476 #else
2477         return 0;
2478 #endif
2479 }
2480
2481 /*********************************************************************
2482  *
2483  *  Set up an fresh starting state
2484  *
2485  **********************************************************************/
2486 static void
2487 igb_reset(struct adapter *adapter)
2488 {
2489         device_t        dev = adapter->dev;
2490         struct e1000_hw *hw = &adapter->hw;
2491         struct e1000_fc_info *fc = &hw->fc;
2492         struct ifnet    *ifp = adapter->ifp;
2493         u32             pba = 0;
2494         u16             hwm;
2495
2496         INIT_DEBUGOUT("igb_reset: begin");
2497
2498         /* Let the firmware know the OS is in control */
2499         igb_get_hw_control(adapter);
2500
2501         /*
2502          * Packet Buffer Allocation (PBA)
2503          * Writing PBA sets the receive portion of the buffer
2504          * the remainder is used for the transmit buffer.
2505          */
2506         switch (hw->mac.type) {
2507         case e1000_82575:
2508                 pba = E1000_PBA_32K;
2509                 break;
2510         case e1000_82576:
2511                 pba = E1000_PBA_64K;
2512                 break;
2513         case e1000_82580:
2514                 pba = E1000_PBA_35K;
2515         default:
2516                 break;
2517         }
2518
2519         /* Special needs in case of Jumbo frames */
2520         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2521                 u32 tx_space, min_tx, min_rx;
2522                 pba = E1000_READ_REG(hw, E1000_PBA);
2523                 tx_space = pba >> 16;
2524                 pba &= 0xffff;
2525                 min_tx = (adapter->max_frame_size +
2526                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2527                 min_tx = roundup2(min_tx, 1024);
2528                 min_tx >>= 10;
2529                 min_rx = adapter->max_frame_size;
2530                 min_rx = roundup2(min_rx, 1024);
2531                 min_rx >>= 10;
2532                 if (tx_space < min_tx &&
2533                     ((min_tx - tx_space) < pba)) {
2534                         pba = pba - (min_tx - tx_space);
2535                         /*
2536                          * if short on rx space, rx wins
2537                          * and must trump tx adjustment
2538                          */
2539                         if (pba < min_rx)
2540                                 pba = min_rx;
2541                 }
2542                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2543         }
2544
2545         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2546
2547         /*
2548          * These parameters control the automatic generation (Tx) and
2549          * response (Rx) to Ethernet PAUSE frames.
2550          * - High water mark should allow for at least two frames to be
2551          *   received after sending an XOFF.
2552          * - Low water mark works best when it is very near the high water mark.
2553          *   This allows the receiver to restart by sending XON when it has
2554          *   drained a bit.
2555          */
2556         hwm = min(((pba << 10) * 9 / 10),
2557             ((pba << 10) - 2 * adapter->max_frame_size));
2558
2559         if (hw->mac.type < e1000_82576) {
2560                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2561                 fc->low_water = fc->high_water - 8;
2562         } else {
2563                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2564                 fc->low_water = fc->high_water - 16;
2565         }
2566
2567         fc->pause_time = IGB_FC_PAUSE_TIME;
2568         fc->send_xon = TRUE;
2569
2570         /* Set Flow control, use the tunable location if sane */
2571         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2572                 fc->requested_mode = igb_fc_setting;
2573         else
2574                 fc->requested_mode = e1000_fc_none;
2575
2576         fc->current_mode = fc->requested_mode;
2577
2578         /* Issue a global reset */
2579         e1000_reset_hw(hw);
2580         E1000_WRITE_REG(hw, E1000_WUC, 0);
2581
2582         if (e1000_init_hw(hw) < 0)
2583                 device_printf(dev, "Hardware Initialization Failed\n");
2584
2585         if (hw->mac.type == e1000_82580) {
2586                 u32 reg;
2587
2588                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2589                 /*
2590                  * 0x80000000 - enable DMA COAL
2591                  * 0x10000000 - use L0s as low power
2592                  * 0x20000000 - use L1 as low power
2593                  * X << 16 - exit dma coal when rx data exceeds X kB
2594                  * Y - upper limit to stay in dma coal in units of 32usecs
2595                  */
2596                 E1000_WRITE_REG(hw, E1000_DMACR,
2597                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2598
2599                 /* set hwm to PBA -  2 * max frame size */
2600                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2601                 /*
2602                  * This sets the time to wait before requesting transition to
2603                  * low power state to number of usecs needed to receive 1 512
2604                  * byte frame at gigabit line rate
2605                  */
2606                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2607
2608                 /* free space in tx packet buffer to wake from DMA coal */
2609                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2610                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2611
2612                 /* make low power state decision controlled by DMA coal */
2613                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2614                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2615                     reg | E1000_PCIEMISC_LX_DECISION);
2616         }
2617
2618         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2619         e1000_get_phy_info(hw);
2620         e1000_check_for_link(hw);
2621         return;
2622 }
2623
2624 /*********************************************************************
2625  *
2626  *  Setup networking device structure and register an interface.
2627  *
2628  **********************************************************************/
2629 static void
2630 igb_setup_interface(device_t dev, struct adapter *adapter)
2631 {
2632         struct ifnet   *ifp;
2633
2634         INIT_DEBUGOUT("igb_setup_interface: begin");
2635
2636         ifp = adapter->ifp = &adapter->arpcom.ac_if;
2637         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2638         ifp->if_mtu = ETHERMTU;
2639         ifp->if_init =  igb_init;
2640         ifp->if_softc = adapter;
2641         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2642         ifp->if_ioctl = igb_ioctl;
2643         ifp->if_start = igb_start;
2644 #ifdef DEVICE_POLLING
2645         ifp->if_poll = igb_poll;
2646 #endif
2647 #if __FreeBSD_version >= 800000
2648         ifp->if_transmit = igb_mq_start;
2649         ifp->if_qflush = igb_qflush;
2650 #endif
2651         ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2652         ifq_set_ready(&ifp->if_snd);
2653
2654         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2655
2656         ifp->if_capabilities = ifp->if_capenable = 0;
2657
2658         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2659 #ifdef NET_TSO
2660         ifp->if_capabilities |= IFCAP_TSO4;
2661 #endif
2662         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2663 #ifdef NET_LRO
2664         if (igb_header_split)
2665                 ifp->if_capabilities |= IFCAP_LRO;
2666 #endif
2667
2668         ifp->if_capenable = ifp->if_capabilities;
2669
2670         /*
2671          * Tell the upper layer(s) we support long frames.
2672          */
2673         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2674         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2675         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2676
2677         /*
2678          * Specify the media types supported by this adapter and register
2679          * callbacks to update media and link information
2680          */
2681         ifmedia_init(&adapter->media, IFM_IMASK,
2682             igb_media_change, igb_media_status);
2683         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2684             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2685                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2686                             0, NULL);
2687                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2688         } else {
2689                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2690                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2691                             0, NULL);
2692                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2693                             0, NULL);
2694                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2695                             0, NULL);
2696                 if (adapter->hw.phy.type != e1000_phy_ife) {
2697                         ifmedia_add(&adapter->media,
2698                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2699                         ifmedia_add(&adapter->media,
2700                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2701                 }
2702         }
2703         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2704         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2705 }
2706
2707
2708 /*
2709  * Manage DMA'able memory.
2710  */
2711 static void
2712 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2713 {
2714         if (error)
2715                 return;
2716         *(bus_addr_t *) arg = segs[0].ds_addr;
2717 }
2718
2719 static int
2720 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2721         struct igb_dma_alloc *dma, int mapflags)
2722 {
2723         int error;
2724
2725         error = bus_dma_tag_create(NULL,                /* parent */
2726                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2727                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2728                                 BUS_SPACE_MAXADDR,      /* highaddr */
2729                                 NULL, NULL,             /* filter, filterarg */
2730                                 size,                   /* maxsize */
2731                                 1,                      /* nsegments */
2732                                 size,                   /* maxsegsize */
2733                                 0,                      /* flags */
2734                                 &dma->dma_tag);
2735         if (error) {
2736                 device_printf(adapter->dev,
2737                     "%s: bus_dma_tag_create failed: %d\n",
2738                     __func__, error);
2739                 goto fail_0;
2740         }
2741
2742         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2743             BUS_DMA_NOWAIT, &dma->dma_map);
2744         if (error) {
2745                 device_printf(adapter->dev,
2746                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2747                     __func__, (uintmax_t)size, error);
2748                 goto fail_2;
2749         }
2750
2751         dma->dma_paddr = 0;
2752         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2753             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2754         if (error || dma->dma_paddr == 0) {
2755                 device_printf(adapter->dev,
2756                     "%s: bus_dmamap_load failed: %d\n",
2757                     __func__, error);
2758                 goto fail_3;
2759         }
2760
2761         return (0);
2762
2763 fail_3:
2764         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2765 fail_2:
2766         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2767         bus_dma_tag_destroy(dma->dma_tag);
2768 fail_0:
2769         dma->dma_map = NULL;
2770         dma->dma_tag = NULL;
2771
2772         return (error);
2773 }
2774
2775 static void
2776 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2777 {
2778         if (dma->dma_tag == NULL)
2779                 return;
2780         if (dma->dma_map != NULL) {
2781                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2782                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2783                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2784                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2785                 dma->dma_map = NULL;
2786         }
2787         bus_dma_tag_destroy(dma->dma_tag);
2788         dma->dma_tag = NULL;
2789 }
2790
2791
2792 /*********************************************************************
2793  *
2794  *  Allocate memory for the transmit and receive rings, and then
2795  *  the descriptors associated with each, called only once at attach.
2796  *
2797  **********************************************************************/
2798 static int
2799 igb_allocate_queues(struct adapter *adapter)
2800 {
2801         device_t dev = adapter->dev;
2802         struct igb_queue        *que = NULL;
2803         struct tx_ring          *txr = NULL;
2804         struct rx_ring          *rxr = NULL;
2805         int rsize, tsize, error = E1000_SUCCESS;
2806         int txconf = 0, rxconf = 0;
2807
2808         /* First allocate the top level queue structs */
2809         if (!(adapter->queues =
2810             (struct igb_queue *) kmalloc(sizeof(struct igb_queue) *
2811             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2812                 device_printf(dev, "Unable to allocate queue memory\n");
2813                 error = ENOMEM;
2814                 goto fail;
2815         }
2816
2817         /* Next allocate the TX ring struct memory */
2818         if (!(adapter->tx_rings =
2819             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2820             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2821                 device_printf(dev, "Unable to allocate TX ring memory\n");
2822                 error = ENOMEM;
2823                 goto tx_fail;
2824         }
2825
2826         /* Now allocate the RX */
2827         if (!(adapter->rx_rings =
2828             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2829             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2830                 device_printf(dev, "Unable to allocate RX ring memory\n");
2831                 error = ENOMEM;
2832                 goto rx_fail;
2833         }
2834
2835         tsize = roundup2(adapter->num_tx_desc *
2836             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2837         /*
2838          * Now set up the TX queues, txconf is needed to handle the
2839          * possibility that things fail midcourse and we need to
2840          * undo memory gracefully
2841          */ 
2842         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2843                 /* Set up some basics */
2844                 txr = &adapter->tx_rings[i];
2845                 txr->adapter = adapter;
2846                 txr->me = i;
2847
2848                 /* Initialize the TX lock */
2849                 ksnprintf(txr->spin_name, sizeof(txr->spin_name), "%s:tx(%d)",
2850                     device_get_nameunit(dev), txr->me);
2851
2852                 IGB_TX_LOCK_INIT(txr);
2853
2854                 if (igb_dma_malloc(adapter, tsize,
2855                         &txr->txdma, BUS_DMA_NOWAIT)) {
2856                         device_printf(dev,
2857                             "Unable to allocate TX Descriptor memory\n");
2858                         error = ENOMEM;
2859                         goto err_tx_desc;
2860                 }
2861                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2862                 bzero((void *)txr->tx_base, tsize);
2863
2864                 /* Now allocate transmit buffers for the ring */
2865                 if (igb_allocate_transmit_buffers(txr)) {
2866                         device_printf(dev,
2867                             "Critical Failure setting up transmit buffers\n");
2868                         error = ENOMEM;
2869                         goto err_tx_desc;
2870                 }
2871 #if __FreeBSD_version >= 800000
2872                 /* Allocate a buf ring */
2873                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2874                     M_WAITOK, &txr->tx_mtx);
2875 #endif
2876         }
2877
2878         /*
2879          * Next the RX queues...
2880          */ 
2881         rsize = roundup2(adapter->num_rx_desc *
2882             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2883         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2884                 rxr = &adapter->rx_rings[i];
2885                 rxr->adapter = adapter;
2886                 rxr->me = i;
2887
2888                 /* Initialize the RX lock */
2889                 ksnprintf(rxr->spin_name, sizeof(rxr->spin_name), "%s:rx(%d)",
2890                     device_get_nameunit(dev), txr->me);
2891
2892                 IGB_RX_LOCK_INIT(rxr);
2893
2894                 if (igb_dma_malloc(adapter, rsize,
2895                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2896                         device_printf(dev,
2897                             "Unable to allocate RxDescriptor memory\n");
2898                         error = ENOMEM;
2899                         goto err_rx_desc;
2900                 }
2901                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2902                 bzero((void *)rxr->rx_base, rsize);
2903
2904                 /* Allocate receive buffers for the ring*/
2905                 if (igb_allocate_receive_buffers(rxr)) {
2906                         device_printf(dev,
2907                             "Critical Failure setting up receive buffers\n");
2908                         error = ENOMEM;
2909                         goto err_rx_desc;
2910                 }
2911         }
2912
2913         /*
2914         ** Finally set up the queue holding structs
2915         */
2916         for (int i = 0; i < adapter->num_queues; i++) {
2917                 que = &adapter->queues[i];
2918                 que->adapter = adapter;
2919                 que->txr = &adapter->tx_rings[i];
2920                 que->rxr = &adapter->rx_rings[i];
2921         }
2922
2923         return (0);
2924
2925 err_rx_desc:
2926         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2927                 igb_dma_free(adapter, &rxr->rxdma);
2928 err_tx_desc:
2929         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2930                 igb_dma_free(adapter, &txr->txdma);
2931         kfree(adapter->rx_rings, M_DEVBUF);
2932 rx_fail:
2933 #if __FreeBSD_version >= 800000
2934         buf_ring_free(txr->br, M_DEVBUF);
2935 #endif
2936         kfree(adapter->tx_rings, M_DEVBUF);
2937 tx_fail:
2938         kfree(adapter->queues, M_DEVBUF);
2939 fail:
2940         return (error);
2941 }
2942
2943 /*********************************************************************
2944  *
2945  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2946  *  the information needed to transmit a packet on the wire. This is
2947  *  called only once at attach, setup is done every reset.
2948  *
2949  **********************************************************************/
2950 static int
2951 igb_allocate_transmit_buffers(struct tx_ring *txr)
2952 {
2953         struct adapter *adapter = txr->adapter;
2954         device_t dev = adapter->dev;
2955         struct igb_tx_buffer *txbuf;
2956         int error, i;
2957
2958         /*
2959          * Setup DMA descriptor areas.
2960          */
2961         if ((error = bus_dma_tag_create(NULL,
2962                                1, 0,                    /* alignment, bounds */
2963                                BUS_SPACE_MAXADDR,       /* lowaddr */
2964                                BUS_SPACE_MAXADDR,       /* highaddr */
2965                                NULL, NULL,              /* filter, filterarg */
2966                                IGB_TSO_SIZE,            /* maxsize */
2967                                IGB_MAX_SCATTER,         /* nsegments */
2968                                PAGE_SIZE,               /* maxsegsize */
2969                                0,                       /* flags */
2970                                &txr->txtag))) {
2971                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2972                 goto fail;
2973         }
2974
2975         if (!(txr->tx_buffers =
2976             (struct igb_tx_buffer *) kmalloc(sizeof(struct igb_tx_buffer) *
2977             adapter->num_tx_desc, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2978                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2979                 error = ENOMEM;
2980                 goto fail;
2981         }
2982
2983         /* Create the descriptor buffer dma maps */
2984         txbuf = txr->tx_buffers;
2985         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2986                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2987                 if (error != 0) {
2988                         device_printf(dev, "Unable to create TX DMA map\n");
2989                         goto fail;
2990                 }
2991         }
2992
2993         return 0;
2994 fail:
2995         /* We free all, it handles case where we are in the middle */
2996         igb_free_transmit_structures(adapter);
2997         return (error);
2998 }
2999
3000 /*********************************************************************
3001  *
3002  *  Initialize a transmit ring.
3003  *
3004  **********************************************************************/
3005 static void
3006 igb_setup_transmit_ring(struct tx_ring *txr)
3007 {
3008         struct adapter *adapter = txr->adapter;
3009         struct igb_tx_buffer *txbuf;
3010         int i;
3011
3012         /* Clear the old descriptor contents */
3013         IGB_TX_LOCK(txr);
3014         bzero((void *)txr->tx_base,
3015               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3016         /* Reset indices */
3017         txr->next_avail_desc = 0;
3018         txr->next_to_clean = 0;
3019
3020         /* Free any existing tx buffers. */
3021         txbuf = txr->tx_buffers;
3022         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3023                 if (txbuf->m_head != NULL) {
3024                         bus_dmamap_sync(txr->txtag, txbuf->map,
3025                             BUS_DMASYNC_POSTWRITE);
3026                         bus_dmamap_unload(txr->txtag, txbuf->map);
3027                         m_freem(txbuf->m_head);
3028                         txbuf->m_head = NULL;
3029                 }
3030                 /* clear the watch index */
3031                 txbuf->next_eop = -1;
3032         }
3033
3034         /* Set number of descriptors available */
3035         txr->tx_avail = adapter->num_tx_desc;
3036
3037         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3038             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3039         IGB_TX_UNLOCK(txr);
3040 }
3041
3042 /*********************************************************************
3043  *
3044  *  Initialize all transmit rings.
3045  *
3046  **********************************************************************/
3047 static void
3048 igb_setup_transmit_structures(struct adapter *adapter)
3049 {
3050         struct tx_ring *txr = adapter->tx_rings;
3051
3052         for (int i = 0; i < adapter->num_queues; i++, txr++)
3053                 igb_setup_transmit_ring(txr);
3054
3055         return;
3056 }
3057
3058 /*********************************************************************
3059  *
3060  *  Enable transmit unit.
3061  *
3062  **********************************************************************/
3063 static void
3064 igb_initialize_transmit_units(struct adapter *adapter)
3065 {
3066         struct tx_ring  *txr = adapter->tx_rings;
3067         struct e1000_hw *hw = &adapter->hw;
3068         u32             tctl, txdctl;
3069
3070         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3071
3072         /* Setup the Tx Descriptor Rings */
3073         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3074                 u64 bus_addr = txr->txdma.dma_paddr;
3075
3076                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3077                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3078                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3079                     (uint32_t)(bus_addr >> 32));
3080                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3081                     (uint32_t)bus_addr);
3082
3083                 /* Setup the HW Tx Head and Tail descriptor pointers */
3084                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3085                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3086
3087                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3088                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3089                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3090
3091                 txr->watchdog_check = FALSE;
3092
3093                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3094                 txdctl |= IGB_TX_PTHRESH;
3095                 txdctl |= IGB_TX_HTHRESH << 8;
3096                 txdctl |= IGB_TX_WTHRESH << 16;
3097                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3098                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3099         }
3100
3101         /* Program the Transmit Control Register */
3102         tctl = E1000_READ_REG(hw, E1000_TCTL);
3103         tctl &= ~E1000_TCTL_CT;
3104         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3105                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3106
3107         e1000_config_collision_dist(hw);
3108
3109         /* This write will effectively turn on the transmit unit. */
3110         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3111 }
3112
3113 /*********************************************************************
3114  *
3115  *  Free all transmit rings.
3116  *
3117  **********************************************************************/
3118 static void
3119 igb_free_transmit_structures(struct adapter *adapter)
3120 {
3121         struct tx_ring *txr = adapter->tx_rings;
3122
3123         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3124                 IGB_TX_LOCK(txr);
3125                 igb_free_transmit_buffers(txr);
3126                 igb_dma_free(adapter, &txr->txdma);
3127                 IGB_TX_UNLOCK(txr);
3128                 IGB_TX_LOCK_DESTROY(txr);
3129         }
3130         kfree(adapter->tx_rings, M_DEVBUF);
3131 }
3132
3133 /*********************************************************************
3134  *
3135  *  Free transmit ring related data structures.
3136  *
3137  **********************************************************************/
3138 static void
3139 igb_free_transmit_buffers(struct tx_ring *txr)
3140 {
3141         struct adapter *adapter = txr->adapter;
3142         struct igb_tx_buffer *tx_buffer;
3143         int             i;
3144
3145         INIT_DEBUGOUT("free_transmit_ring: begin");
3146
3147         if (txr->tx_buffers == NULL)
3148                 return;
3149
3150         tx_buffer = txr->tx_buffers;
3151         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3152                 if (tx_buffer->m_head != NULL) {
3153                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3154                             BUS_DMASYNC_POSTWRITE);
3155                         bus_dmamap_unload(txr->txtag,
3156                             tx_buffer->map);
3157                         m_freem(tx_buffer->m_head);
3158                         tx_buffer->m_head = NULL;
3159                         if (tx_buffer->map != NULL) {
3160                                 bus_dmamap_destroy(txr->txtag,
3161                                     tx_buffer->map);
3162                                 tx_buffer->map = NULL;
3163                         }
3164                 } else if (tx_buffer->map != NULL) {
3165                         bus_dmamap_unload(txr->txtag,
3166                             tx_buffer->map);
3167                         bus_dmamap_destroy(txr->txtag,
3168                             tx_buffer->map);
3169                         tx_buffer->map = NULL;
3170                 }
3171         }
3172 #if __FreeBSD_version >= 800000
3173         if (txr->br != NULL)
3174                 buf_ring_free(txr->br, M_DEVBUF);
3175 #endif
3176         if (txr->tx_buffers != NULL) {
3177                 kfree(txr->tx_buffers, M_DEVBUF);
3178                 txr->tx_buffers = NULL;
3179         }
3180         if (txr->txtag != NULL) {
3181                 bus_dma_tag_destroy(txr->txtag);
3182                 txr->txtag = NULL;
3183         }
3184         return;
3185 }
3186
3187 /**********************************************************************
3188  *
3189  *  Setup work for hardware segmentation offload (TSO)
3190  *
3191  **********************************************************************/
3192 #ifdef NET_TSO 
3193 static boolean_t
3194 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3195 {
3196         struct adapter *adapter = txr->adapter;
3197         struct e1000_adv_tx_context_desc *TXD;
3198         struct igb_tx_buffer        *tx_buffer;
3199         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3200         u32 mss_l4len_idx = 0;
3201         u16 vtag = 0;
3202         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3203         struct ether_vlan_header *eh;
3204         struct ip *ip;
3205         struct tcphdr *th;
3206
3207
3208         /*
3209          * Determine where frame payload starts.
3210          * Jump over vlan headers if already present
3211          */
3212         eh = mtod(mp, struct ether_vlan_header *);
3213         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3214                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3215         else
3216                 ehdrlen = ETHER_HDR_LEN;
3217
3218         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3219         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3220                 return FALSE;
3221
3222         /* Only supports IPV4 for now */
3223         ctxd = txr->next_avail_desc;
3224         tx_buffer = &txr->tx_buffers[ctxd];
3225         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3226
3227         ip = (struct ip *)(mp->m_data + ehdrlen);
3228         if (ip->ip_p != IPPROTO_TCP)
3229                 return FALSE;   /* 0 */
3230         ip->ip_sum = 0;
3231         ip_hlen = ip->ip_hl << 2;
3232         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3233         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3234             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3235         tcp_hlen = th->th_off << 2;
3236         /*
3237          * Calculate header length, this is used
3238          * in the transmit desc in igb_xmit
3239          */
3240         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3241
3242         /* VLAN MACLEN IPLEN */
3243         if (mp->m_flags & M_VLANTAG) {
3244                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3245                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3246         }
3247
3248         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3249         vlan_macip_lens |= ip_hlen;
3250         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3251
3252         /* ADV DTYPE TUCMD */
3253         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3254         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3255         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3256         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3257
3258         /* MSS L4LEN IDX */
3259         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3260         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3261         /* 82575 needs the queue index added */
3262         if (adapter->hw.mac.type == e1000_82575)
3263                 mss_l4len_idx |= txr->me << 4;
3264         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3265
3266         TXD->seqnum_seed = htole32(0);
3267         tx_buffer->m_head = NULL;
3268         tx_buffer->next_eop = -1;
3269
3270         if (++ctxd == adapter->num_tx_desc)
3271                 ctxd = 0;
3272
3273         txr->tx_avail--;
3274         txr->next_avail_desc = ctxd;
3275         return TRUE;
3276 }
3277 #endif
3278
3279 /*********************************************************************
3280  *
3281  *  Context Descriptor setup for VLAN or CSUM
3282  *
3283  **********************************************************************/
3284
3285 static bool
3286 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3287 {
3288         struct adapter *adapter = txr->adapter;
3289         struct e1000_adv_tx_context_desc *TXD;
3290         struct igb_tx_buffer        *tx_buffer;
3291         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3292         struct ether_vlan_header *eh;
3293         struct ip *ip = NULL;
3294         struct ip6_hdr *ip6;
3295         int  ehdrlen, ctxd, ip_hlen = 0;
3296         u16     etype, vtag = 0;
3297         u8      ipproto = 0;
3298         bool    offload = TRUE;
3299
3300         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3301                 offload = FALSE;
3302
3303         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3304         ctxd = txr->next_avail_desc;
3305         tx_buffer = &txr->tx_buffers[ctxd];
3306         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3307
3308         /*
3309         ** In advanced descriptors the vlan tag must 
3310         ** be placed into the context descriptor, thus
3311         ** we need to be here just for that setup.
3312         */
3313         if (mp->m_flags & M_VLANTAG) {
3314                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3315                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3316         } else if (offload == FALSE)
3317                 return FALSE;
3318
3319         /*
3320          * Determine where frame payload starts.
3321          * Jump over vlan headers if already present,
3322          * helpful for QinQ too.
3323          */
3324         eh = mtod(mp, struct ether_vlan_header *);
3325         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3326                 etype = ntohs(eh->evl_proto);
3327                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3328         } else {
3329                 etype = ntohs(eh->evl_encap_proto);
3330                 ehdrlen = ETHER_HDR_LEN;
3331         }
3332
3333         /* Set the ether header length */
3334         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3335
3336         switch (etype) {
3337                 case ETHERTYPE_IP:
3338                         ip = (struct ip *)(mp->m_data + ehdrlen);
3339                         ip_hlen = ip->ip_hl << 2;
3340                         if (mp->m_len < ehdrlen + ip_hlen) {
3341                                 offload = FALSE;
3342                                 break;
3343                         }
3344                         ipproto = ip->ip_p;
3345                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3346                         break;
3347                 case ETHERTYPE_IPV6:
3348                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3349                         ip_hlen = sizeof(struct ip6_hdr);
3350                         if (mp->m_len < ehdrlen + ip_hlen)
3351                                 return (FALSE);
3352                         ipproto = ip6->ip6_nxt;
3353                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3354                         break;
3355                 default:
3356                         offload = FALSE;
3357                         break;
3358         }
3359
3360         vlan_macip_lens |= ip_hlen;
3361         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3362
3363         switch (ipproto) {
3364                 case IPPROTO_TCP:
3365                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3366                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3367                         break;
3368                 case IPPROTO_UDP:
3369                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3370                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3371                         break;
3372 #if __FreeBSD_version >= 800000
3373                 case IPPROTO_SCTP:
3374                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3375                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3376                         break;
3377 #endif
3378                 default:
3379                         offload = FALSE;
3380                         break;
3381         }
3382
3383         /* 82575 needs the queue index added */
3384         if (adapter->hw.mac.type == e1000_82575)
3385                 mss_l4len_idx = txr->me << 4;
3386
3387         /* Now copy bits into descriptor */
3388         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3389         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3390         TXD->seqnum_seed = htole32(0);
3391         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3392
3393         tx_buffer->m_head = NULL;
3394         tx_buffer->next_eop = -1;
3395
3396         /* We've consumed the first desc, adjust counters */
3397         if (++ctxd == adapter->num_tx_desc)
3398                 ctxd = 0;
3399         txr->next_avail_desc = ctxd;
3400         --txr->tx_avail;
3401
3402         return (offload);
3403 }
3404
3405
3406 /**********************************************************************
3407  *
3408  *  Examine each tx_buffer in the used queue. If the hardware is done
3409  *  processing the packet then free associated resources. The
3410  *  tx_buffer is put back on the free queue.
3411  *
3412  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3413  **********************************************************************/
3414 static bool
3415 igb_txeof(struct tx_ring *txr)
3416 {
3417         struct adapter  *adapter = txr->adapter;
3418         int first, last, done;
3419         struct igb_tx_buffer *tx_buffer;
3420         struct e1000_tx_desc   *tx_desc, *eop_desc;
3421         struct ifnet   *ifp = adapter->ifp;
3422
3423         IGB_TX_LOCK_ASSERT(txr);
3424
3425         if (txr->tx_avail == adapter->num_tx_desc)
3426                 return FALSE;
3427
3428         first = txr->next_to_clean;
3429         tx_desc = &txr->tx_base[first];
3430         tx_buffer = &txr->tx_buffers[first];
3431         last = tx_buffer->next_eop;
3432         eop_desc = &txr->tx_base[last];
3433
3434         /*
3435          * What this does is get the index of the
3436          * first descriptor AFTER the EOP of the 
3437          * first packet, that way we can do the
3438          * simple comparison on the inner while loop.
3439          */
3440         if (++last == adapter->num_tx_desc)
3441                 last = 0;
3442         done = last;
3443
3444         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3445             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3446
3447         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3448                 /* We clean the range of the packet */
3449                 while (first != done) {
3450                         tx_desc->upper.data = 0;
3451                         tx_desc->lower.data = 0;
3452                         tx_desc->buffer_addr = 0;
3453                         ++txr->tx_avail;
3454
3455                         if (tx_buffer->m_head) {
3456                                 txr->bytes +=
3457                                     tx_buffer->m_head->m_pkthdr.len;
3458                                 bus_dmamap_sync(txr->txtag,
3459                                     tx_buffer->map,
3460                                     BUS_DMASYNC_POSTWRITE);
3461                                 bus_dmamap_unload(txr->txtag,
3462                                     tx_buffer->map);
3463
3464                                 m_freem(tx_buffer->m_head);
3465                                 tx_buffer->m_head = NULL;
3466                         }
3467                         tx_buffer->next_eop = -1;
3468                         txr->watchdog_time = ticks;
3469
3470                         if (++first == adapter->num_tx_desc)
3471                                 first = 0;
3472
3473                         tx_buffer = &txr->tx_buffers[first];
3474                         tx_desc = &txr->tx_base[first];
3475                 }
3476                 ++txr->packets;
3477                 ++ifp->if_opackets;
3478                 /* See if we can continue to the next packet */
3479                 last = tx_buffer->next_eop;
3480                 if (last != -1) {
3481                         eop_desc = &txr->tx_base[last];
3482                         /* Get new done point */
3483                         if (++last == adapter->num_tx_desc) last = 0;
3484                         done = last;
3485                 } else
3486                         break;
3487         }
3488         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3489             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3490
3491         txr->next_to_clean = first;
3492
3493         /*
3494          * If we have enough room, clear IFF_DRV_OACTIVE
3495          * to tell the stack that it is OK to send packets.
3496          */
3497         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3498                 ifp->if_flags &= ~IFF_OACTIVE;
3499                 /* All clean, turn off the watchdog */
3500                 if (txr->tx_avail == adapter->num_tx_desc) {
3501                         txr->watchdog_check = FALSE;
3502                         return FALSE;
3503                 }
3504         }
3505
3506         return (TRUE);
3507 }
3508
3509
3510 /*********************************************************************
3511  *
3512  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3513  *              i - designates the ring index
3514  *              clean - tells the function whether to update
3515  *                      the header, the packet buffer, or both.
3516  *
3517  **********************************************************************/
3518 static int
3519 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3520 {
3521         struct adapter          *adapter = rxr->adapter;
3522         struct igb_rx_buf       *rxbuf;
3523         struct mbuf             *mh, *mp;
3524         bus_dma_segment_t       hseg[1];
3525         bus_dma_segment_t       pseg[1];
3526         bus_dmamap_t            map;
3527         int                     nsegs, error;
3528         int                     mbflags;
3529
3530         /*
3531          * Init-time loads are allowed to use a blocking mbuf allocation,
3532          * otherwise the sheer number of mbufs allocated can lead to
3533          * failures.
3534          */
3535         mbflags = (clean & IGB_CLEAN_INITIAL) ? MB_WAIT : MB_DONTWAIT;
3536
3537         rxbuf = &rxr->rx_buffers[i];
3538         mh = mp = NULL;
3539         if ((clean & IGB_CLEAN_HEADER) != 0) {
3540                 mh = m_gethdr(mbflags, MT_DATA);
3541                 if (mh == NULL) {
3542                         adapter->mbuf_header_failed++;          
3543                         return (ENOBUFS);
3544                 }
3545                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3546                 /*
3547                  * Because IGB_HDR_BUF size is less than MHLEN
3548                  * and we configure controller to split headers
3549                  * we can align mbuf on ETHER_ALIGN boundary.
3550                  */
3551                 m_adj(mh, ETHER_ALIGN);
3552                 error = bus_dmamap_load_mbuf_segment(rxr->rx_htag,
3553                     rxr->rx_hspare_map, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3554                 if (error != 0) {
3555                         m_freem(mh);
3556                         return (error);
3557                 }
3558                 mh->m_flags &= ~M_PKTHDR;
3559         }
3560         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3561                 mp = m_getl(adapter->rx_mbuf_sz, mbflags, MT_DATA,
3562                             M_PKTHDR, NULL);
3563 #if 0
3564                 mp = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR,
3565                     adapter->rx_mbuf_sz);
3566 #endif
3567                 if (mp == NULL) {
3568                         if (mh != NULL) {
3569                                 adapter->mbuf_packet_failed++;          
3570                                 bus_dmamap_unload(rxr->rx_htag,
3571                                     rxbuf->head_map);
3572                                 mh->m_flags |= M_PKTHDR;
3573                                 m_freem(mh);
3574                         }
3575                         return (ENOBUFS);
3576                 }
3577                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3578                 error = bus_dmamap_load_mbuf_segment(rxr->rx_ptag,
3579                     rxr->rx_pspare_map, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3580                 if (error != 0) {
3581                         if (mh != NULL) {
3582                                 bus_dmamap_unload(rxr->rx_htag,
3583                                     rxbuf->head_map);
3584                                 mh->m_flags |= M_PKTHDR;
3585                                 m_freem(mh);
3586                         }
3587                         m_freem(mp);
3588                         return (error);
3589                 }
3590                 mp->m_flags &= ~M_PKTHDR;
3591         }
3592
3593         /* Loading new DMA maps complete, unload maps for received buffers. */
3594         if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3595                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3596                     BUS_DMASYNC_POSTREAD);
3597                 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3598         }
3599         if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3600                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3601                     BUS_DMASYNC_POSTREAD);
3602                 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3603         }
3604
3605         /* Reflect loaded dmamaps. */
3606         if ((clean & IGB_CLEAN_HEADER) != 0) {
3607                 map = rxbuf->head_map;
3608                 rxbuf->head_map = rxr->rx_hspare_map;
3609                 rxr->rx_hspare_map = map;
3610                 rxbuf->m_head = mh;
3611                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3612                     BUS_DMASYNC_PREREAD);
3613                 rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3614         }
3615         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3616                 map = rxbuf->pack_map;
3617                 rxbuf->pack_map = rxr->rx_pspare_map;
3618                 rxr->rx_pspare_map = map;
3619                 rxbuf->m_pack = mp;
3620                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3621                     BUS_DMASYNC_PREREAD);
3622                 rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3623         }
3624
3625         return (0);
3626 }
3627
3628 /*********************************************************************
3629  *
3630  *  Allocate memory for rx_buffer structures. Since we use one
3631  *  rx_buffer per received packet, the maximum number of rx_buffer's
3632  *  that we'll need is equal to the number of receive descriptors
3633  *  that we've allocated.
3634  *
3635  **********************************************************************/
3636 static int
3637 igb_allocate_receive_buffers(struct rx_ring *rxr)
3638 {
3639         struct  adapter         *adapter = rxr->adapter;
3640         device_t                dev = adapter->dev;
3641         struct igb_rx_buf       *rxbuf;
3642         int                     i, bsize, error;
3643
3644         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3645         if (!(rxr->rx_buffers =
3646             (struct igb_rx_buf *) kmalloc(bsize,
3647             M_DEVBUF, M_INTWAIT | M_ZERO))) {
3648                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3649                 error = ENOMEM;
3650                 goto fail;
3651         }
3652
3653         if ((error = bus_dma_tag_create(NULL,
3654                                    1, 0,                /* alignment, bounds */
3655                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3656                                    BUS_SPACE_MAXADDR,   /* highaddr */
3657                                    NULL, NULL,          /* filter, filterarg */
3658                                    MSIZE,               /* maxsize */
3659                                    1,                   /* nsegments */
3660                                    MSIZE,               /* maxsegsize */
3661                                    0,                   /* flags */
3662                                    &rxr->rx_htag))) {
3663                 device_printf(dev, "Unable to create RX DMA tag\n");
3664                 goto fail;
3665         }
3666
3667         if ((error = bus_dma_tag_create(NULL,
3668                                    1, 0,                /* alignment, bounds */
3669                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3670                                    BUS_SPACE_MAXADDR,   /* highaddr */
3671                                    NULL, NULL,          /* filter, filterarg */
3672                                    MJUMPAGESIZE,        /* maxsize */
3673                                    1,                   /* nsegments */
3674                                    MJUMPAGESIZE,        /* maxsegsize */
3675                                    0,                   /* flags */
3676                                    &rxr->rx_ptag))) {
3677                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3678                 goto fail;
3679         }
3680
3681         /* Create the spare maps (used by getbuf) */
3682         error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3683              &rxr->rx_hspare_map);
3684         if (error) {
3685                 device_printf(dev,
3686                     "%s: bus_dmamap_create header spare failed: %d\n",
3687                     __func__, error);
3688                 goto fail;
3689         }
3690         error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3691              &rxr->rx_pspare_map);
3692         if (error) {
3693                 device_printf(dev,
3694                     "%s: bus_dmamap_create packet spare failed: %d\n",
3695                     __func__, error);
3696                 goto fail;
3697         }
3698
3699         for (i = 0; i < adapter->num_rx_desc; i++) {
3700                 rxbuf = &rxr->rx_buffers[i];
3701                 error = bus_dmamap_create(rxr->rx_htag,
3702                     BUS_DMA_NOWAIT, &rxbuf->head_map);
3703                 if (error) {
3704                         device_printf(dev,
3705                             "Unable to create RX head DMA maps\n");
3706                         goto fail;
3707                 }
3708                 error = bus_dmamap_create(rxr->rx_ptag,
3709                     BUS_DMA_NOWAIT, &rxbuf->pack_map);
3710                 if (error) {
3711                         device_printf(dev,
3712                             "Unable to create RX packet DMA maps\n");
3713                         goto fail;
3714                 }
3715         }
3716
3717         return (0);
3718
3719 fail:
3720         /* Frees all, but can handle partial completion */
3721         igb_free_receive_structures(adapter);
3722         return (error);
3723 }
3724
3725
3726 static void
3727 igb_free_receive_ring(struct rx_ring *rxr)
3728 {
3729         struct  adapter         *adapter;
3730         struct igb_rx_buf       *rxbuf;
3731         int i;
3732
3733         adapter = rxr->adapter;
3734         for (i = 0; i < adapter->num_rx_desc; i++) {
3735                 rxbuf = &rxr->rx_buffers[i];
3736                 if (rxbuf->m_head != NULL) {
3737                         bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3738                             BUS_DMASYNC_POSTREAD);
3739                         bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3740                         rxbuf->m_head->m_flags |= M_PKTHDR;
3741                         m_freem(rxbuf->m_head);
3742                 }
3743                 if (rxbuf->m_pack != NULL) {
3744                         bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3745                             BUS_DMASYNC_POSTREAD);
3746                         bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3747                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3748                         m_freem(rxbuf->m_pack);
3749                 }
3750                 rxbuf->m_head = NULL;
3751                 rxbuf->m_pack = NULL;
3752         }
3753 }
3754
3755
3756 /*********************************************************************
3757  *
3758  *  Initialize a receive ring and its buffers.
3759  *
3760  **********************************************************************/
3761 static int
3762 igb_setup_receive_ring(struct rx_ring *rxr)
3763 {
3764         struct  adapter         *adapter;
3765         struct  ifnet           *ifp;
3766         device_t                dev;
3767 #ifdef NET_LRO 
3768         struct lro_ctrl         *lro = &rxr->lro;
3769 #endif
3770         int                     j, rsize, error = 0;
3771
3772         adapter = rxr->adapter;
3773         dev = adapter->dev;
3774         ifp = adapter->ifp;
3775
3776         /* Clear the ring contents */
3777         IGB_RX_LOCK(rxr);
3778         rsize = roundup2(adapter->num_rx_desc *
3779             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3780         bzero((void *)rxr->rx_base, rsize);
3781
3782         /*
3783         ** Free current RX buffer structures and their mbufs
3784         */
3785         igb_free_receive_ring(rxr);
3786
3787         /* Now replenish the ring mbufs */
3788         for (j = 0; j < adapter->num_rx_desc; j++) {
3789                 error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH | IGB_CLEAN_INITIAL);
3790                 if (error)
3791                         goto fail;
3792         }
3793
3794         /* Setup our descriptor indices */
3795         rxr->next_to_check = 0;
3796         rxr->last_cleaned = 0;
3797         rxr->lro_enabled = FALSE;
3798
3799         if (igb_header_split)
3800                 rxr->hdr_split = TRUE;
3801 #if NET_LRO 
3802         else
3803                 ifp->if_capabilities &= ~IFCAP_LRO;
3804 #endif
3805
3806         rxr->fmp = NULL;
3807         rxr->lmp = NULL;
3808         rxr->discard = FALSE;
3809
3810         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3811             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3812
3813         /*
3814         ** Now set up the LRO interface, we
3815         ** also only do head split when LRO
3816         ** is enabled, since so often they
3817         ** are undesireable in similar setups.
3818         */
3819 #if NET_LRO 
3820         if (ifp->if_capenable & IFCAP_LRO) {
3821                 int err = tcp_lro_init(lro);
3822                 if (err) {
3823                         device_printf(dev, "LRO Initialization failed!\n");
3824                         goto fail;
3825                 }
3826                 INIT_DEBUGOUT("RX LRO Initialized\n");
3827                 rxr->lro_enabled = TRUE;
3828                 lro->ifp = adapter->ifp;
3829         }
3830 #endif
3831
3832         IGB_RX_UNLOCK(rxr);
3833         return (0);
3834
3835 fail:
3836         igb_free_receive_ring(rxr);
3837         IGB_RX_UNLOCK(rxr);
3838         return (error);
3839 }
3840
3841 /*********************************************************************
3842  *
3843  *  Initialize all receive rings.
3844  *
3845  **********************************************************************/
3846 static int
3847 igb_setup_receive_structures(struct adapter *adapter)
3848 {
3849         struct rx_ring *rxr = adapter->rx_rings;
3850         int i, j;
3851
3852         for (i = 0; i < adapter->num_queues; i++, rxr++)
3853                 if (igb_setup_receive_ring(rxr))
3854                         goto fail;
3855
3856         return (0);
3857 fail:
3858         /*
3859          * Free RX buffers allocated so far, we will only handle
3860          * the rings that completed, the failing case will have
3861          * cleaned up for itself. The value of 'i' will be the
3862          * failed ring so we must pre-decrement it.
3863          */
3864         rxr = adapter->rx_rings;
3865         for (--i; i > 0; i--, rxr++) {
3866                 for (j = 0; j < adapter->num_rx_desc; j++)
3867                         igb_free_receive_ring(rxr);
3868         }
3869
3870         return (ENOBUFS);
3871 }
3872
3873 /*********************************************************************
3874  *
3875  *  Enable receive unit.
3876  *
3877  **********************************************************************/
3878 static void
3879 igb_initialize_receive_units(struct adapter *adapter)
3880 {
3881         struct rx_ring  *rxr = adapter->rx_rings;
3882         struct ifnet    *ifp = adapter->ifp;
3883         struct e1000_hw *hw = &adapter->hw;
3884         u32             rctl, rxcsum, psize, srrctl = 0;
3885
3886         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3887
3888         /*
3889          * Make sure receives are disabled while setting
3890          * up the descriptor ring
3891          */
3892         rctl = E1000_READ_REG(hw, E1000_RCTL);
3893         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3894
3895         /*
3896         ** Set up for header split
3897         */
3898         if (rxr->hdr_split) {
3899                 /* Use a standard mbuf for the header */
3900                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3901                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3902         } else
3903                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3904
3905         /*
3906         ** Set up for jumbo frames
3907         */
3908         if (ifp->if_mtu > ETHERMTU) {
3909                 rctl |= E1000_RCTL_LPE;
3910                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3911                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3912
3913                 /* Set maximum packet len */
3914                 psize = adapter->max_frame_size;
3915
3916                 /* Prepare for VLAN */
3917                 psize += VLAN_TAG_SIZE;
3918                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3919         } else {
3920                 rctl &= ~E1000_RCTL_LPE;
3921                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3922                 rctl |= E1000_RCTL_SZ_2048;
3923         }
3924
3925         /* Setup the Base and Length of the Rx Descriptor Rings */
3926         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3927                 u64 bus_addr = rxr->rxdma.dma_paddr;
3928                 u32 rxdctl;
3929
3930                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3931                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3932                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3933                     (uint32_t)(bus_addr >> 32));
3934                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3935                     (uint32_t)bus_addr);
3936                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3937                 /* Enable this Queue */
3938                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3939                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3940                 rxdctl &= 0xFFF00000;
3941                 rxdctl |= IGB_RX_PTHRESH;
3942                 rxdctl |= IGB_RX_HTHRESH << 8;
3943                 rxdctl |= IGB_RX_WTHRESH << 16;
3944                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3945         }
3946
3947         /*
3948         ** Setup for RX MultiQueue
3949         */
3950         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3951         if (adapter->num_queues >1) {
3952                 u32 random[10], mrqc, shift = 0;
3953                 union igb_reta {
3954                         u32 dword;
3955                         u8  bytes[4];
3956                 } reta;
3957
3958                 karc4rand(&random, sizeof(random));
3959                 if (adapter->hw.mac.type == e1000_82575)
3960                         shift = 6;
3961                 /* Warning FM follows */
3962                 for (int i = 0; i < 128; i++) {
3963                         reta.bytes[i & 3] =
3964                             (i % adapter->num_queues) << shift;
3965                         if ((i & 3) == 3)
3966                                 E1000_WRITE_REG(hw,
3967                                     E1000_RETA(i >> 2), reta.dword);
3968                 }
3969                 /* Now fill in hash table */
3970                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3971                 for (int i = 0; i < 10; i++)
3972                         E1000_WRITE_REG_ARRAY(hw,
3973                             E1000_RSSRK(0), i, random[i]);
3974
3975                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3976                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
3977                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3978                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
3979                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3980                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
3981                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3982                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3983
3984                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3985
3986                 /*
3987                 ** NOTE: Receive Full-Packet Checksum Offload 
3988                 ** is mutually exclusive with Multiqueue. However
3989                 ** this is not the same as TCP/IP checksums which
3990                 ** still work.
3991                 */
3992                 rxcsum |= E1000_RXCSUM_PCSD;
3993 #if __FreeBSD_version >= 800000
3994                 /* For SCTP Offload */
3995                 if ((hw->mac.type == e1000_82576)
3996                     && (ifp->if_capenable & IFCAP_RXCSUM))
3997                         rxcsum |= E1000_RXCSUM_CRCOFL;
3998 #endif
3999         } else {
4000                 /* Non RSS setup */
4001                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4002                         rxcsum |= E1000_RXCSUM_IPPCSE;
4003 #if __FreeBSD_version >= 800000
4004                         if (adapter->hw.mac.type == e1000_82576)
4005                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4006 #endif
4007                 } else
4008                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4009         }
4010         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4011
4012         /* Setup the Receive Control Register */
4013         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4014         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4015                    E1000_RCTL_RDMTS_HALF |
4016                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4017         /* Strip CRC bytes. */
4018         rctl |= E1000_RCTL_SECRC;
4019         /* Make sure VLAN Filters are off */
4020         rctl &= ~E1000_RCTL_VFE;
4021         /* Don't store bad packets */
4022         rctl &= ~E1000_RCTL_SBP;
4023
4024         /* Enable Receives */
4025         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4026
4027         /*
4028          * Setup the HW Rx Head and Tail Descriptor Pointers
4029          *   - needs to be after enable
4030          */
4031         for (int i = 0; i < adapter->num_queues; i++) {
4032                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4033                 E1000_WRITE_REG(hw, E1000_RDT(i),
4034                      adapter->num_rx_desc - 1);
4035         }
4036         return;
4037 }
4038
4039 /*********************************************************************
4040  *
4041  *  Free receive rings.
4042  *
4043  **********************************************************************/
4044 static void
4045 igb_free_receive_structures(struct adapter *adapter)
4046 {
4047         struct rx_ring *rxr = adapter->rx_rings;
4048
4049         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4050 #ifdef NET_LRO 
4051                 struct lro_ctrl *lro = &rxr->lro;
4052 #endif
4053                 IGB_RX_LOCK(rxr);
4054                 igb_free_receive_buffers(rxr);
4055 #ifdef NET_LRO
4056                 tcp_lro_free(lro);
4057 #endif
4058                 igb_dma_free(adapter, &rxr->rxdma);
4059                 IGB_RX_UNLOCK(rxr);
4060                 IGB_RX_LOCK_DESTROY(rxr);
4061         }
4062
4063         kfree(adapter->rx_rings, M_DEVBUF);
4064 }
4065
4066 /*********************************************************************
4067  *
4068  *  Free receive ring data structures.
4069  *
4070  **********************************************************************/
4071 static void
4072 igb_free_receive_buffers(struct rx_ring *rxr)
4073 {
4074         struct adapter          *adapter = rxr->adapter;
4075         struct igb_rx_buf       *rxbuf;
4076         int i;
4077
4078         INIT_DEBUGOUT("free_receive_structures: begin");
4079
4080         if (rxr->rx_hspare_map != NULL) {
4081                 bus_dmamap_destroy(rxr->rx_htag, rxr->rx_hspare_map);
4082                 rxr->rx_hspare_map = NULL;
4083         }
4084
4085         if (rxr->rx_hspare_map != NULL) {
4086                 bus_dmamap_destroy(rxr->rx_ptag, rxr->rx_pspare_map);
4087                 rxr->rx_pspare_map = NULL;
4088         }
4089
4090         /* Cleanup any existing buffers */
4091         if (rxr->rx_buffers != NULL) {
4092                 for (i = 0; i < adapter->num_rx_desc; i++) {
4093                         rxbuf = &rxr->rx_buffers[i];
4094                         if (rxbuf->m_head != NULL) {
4095                                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
4096                                     BUS_DMASYNC_POSTREAD);
4097                                 bus_dmamap_unload(rxr->rx_htag,
4098                                     rxbuf->head_map);
4099                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4100                                 m_freem(rxbuf->m_head);
4101                         }
4102                         if (rxbuf->m_pack != NULL) {
4103                                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
4104                                     BUS_DMASYNC_POSTREAD);
4105                                 bus_dmamap_unload(rxr->rx_ptag,
4106                                     rxbuf->pack_map);
4107                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4108                                 m_freem(rxbuf->m_pack);
4109                         }
4110                         rxbuf->m_head = NULL;
4111                         rxbuf->m_pack = NULL;
4112                         if (rxbuf->head_map != NULL) {
4113                                 bus_dmamap_destroy(rxr->rx_htag,
4114                                     rxbuf->head_map);
4115                                 rxbuf->head_map = NULL;
4116                         }
4117                         if (rxbuf->pack_map != NULL) {
4118                                 bus_dmamap_destroy(rxr->rx_ptag,
4119                                     rxbuf->pack_map);
4120                                 rxbuf->pack_map = NULL;
4121                         }
4122                 }
4123                 if (rxr->rx_buffers != NULL) {
4124                         kfree(rxr->rx_buffers, M_DEVBUF);
4125                         rxr->rx_buffers = NULL;
4126                 }
4127         }
4128
4129         if (rxr->rx_htag != NULL) {
4130                 bus_dma_tag_destroy(rxr->rx_htag);
4131                 rxr->rx_htag = NULL;
4132         }
4133         if (rxr->rx_ptag != NULL) {
4134                 bus_dma_tag_destroy(rxr->rx_ptag);
4135                 rxr->rx_ptag = NULL;
4136         }
4137 }
4138
4139 static __inline void
4140 igb_rx_discard(struct rx_ring *rxr, union e1000_adv_rx_desc *cur, int i)
4141 {
4142
4143         if (rxr->fmp != NULL) {
4144                 rxr->fmp->m_flags |= M_PKTHDR;
4145                 m_freem(rxr->fmp);
4146                 rxr->fmp = NULL;
4147                 rxr->lmp = NULL;
4148         }
4149 }
4150
4151 static __inline void
4152 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4153 {
4154
4155         /*
4156          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4157          * should be computed by hardware. Also it should not have VLAN tag in
4158          * ethernet header.
4159          */
4160 #ifdef NET_LRO
4161         if (rxr->lro_enabled &&
4162             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4163             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4164             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4165             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4166             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4167             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4168                 /*
4169                  * Send to the stack if:
4170                  **  - LRO not enabled, or
4171                  **  - no LRO resources, or
4172                  **  - lro enqueue fails
4173                  */
4174                 if (rxr->lro.lro_cnt != 0)
4175                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4176                                 return;
4177         }
4178 #endif
4179         (*ifp->if_input)(ifp, m);
4180 }
4181
4182 /*********************************************************************
4183  *
4184  *  This routine executes in interrupt context. It replenishes
4185  *  the mbufs in the descriptor and sends data which has been
4186  *  dma'ed into host memory to upper layer.
4187  *
4188  *  We loop at most count times if count is > 0, or until done if
4189  *  count < 0.
4190  *
4191  *  Return TRUE if more to clean, FALSE otherwise
4192  *********************************************************************/
4193 static bool
4194 igb_rxeof(struct rx_ring *rxr, int count)
4195 {
4196         struct adapter          *adapter = rxr->adapter;
4197         struct ifnet            *ifp = adapter->ifp;
4198 #ifdef NET_LRO
4199         struct lro_ctrl         *lro = &rxr->lro;
4200         struct lro_entry        *queued;
4201 #endif
4202         int                     i, prog = 0;
4203         u32                     ptype, staterr = 0;
4204         union e1000_adv_rx_desc *cur;
4205
4206         IGB_RX_LOCK(rxr);
4207
4208         /* Main clean loop */
4209         for (i = rxr->next_to_check; count > 0; prog++) {
4210                 struct mbuf *sendmp, *mh, *mp;
4211                 u16 hlen, plen, hdr, vtag;
4212                 bool eop = FALSE;
4213                 u8 dopayload;
4214  
4215                 /* Sync the ring. */
4216                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4217                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4218                 cur = &rxr->rx_base[i];
4219                 staterr = le32toh(cur->wb.upper.status_error);
4220                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4221                         break;
4222                 if ((ifp->if_flags & IFF_RUNNING) == 0)
4223                         break;
4224                 count--;
4225                 sendmp = mh = mp = NULL;
4226                 cur->wb.upper.status_error = 0;
4227                 plen = le16toh(cur->wb.upper.length);
4228                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4229                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4230                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4231
4232                 /* Make sure all segments of a bad packet are discarded */
4233                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4234                     (rxr->discard)) {
4235                         ifp->if_ierrors++;
4236                         ++rxr->rx_discarded;
4237                         if (!eop) /* Catch subsequent segs */
4238                                 rxr->discard = TRUE;
4239                         else
4240                                 rxr->discard = FALSE;
4241                         igb_rx_discard(rxr, cur, i);
4242                         goto next_desc;
4243                 }
4244
4245                 /*
4246                 ** The way the hardware is configured to
4247                 ** split, it will ONLY use the header buffer
4248                 ** when header split is enabled, otherwise we
4249                 ** get normal behavior, ie, both header and
4250                 ** payload are DMA'd into the payload buffer.
4251                 **
4252                 ** The fmp test is to catch the case where a
4253                 ** packet spans multiple descriptors, in that
4254                 ** case only the first header is valid.
4255                 */
4256                 if (rxr->hdr_split && rxr->fmp == NULL) {
4257                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4258                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4259                         if (hlen > IGB_HDR_BUF)
4260                                 hlen = IGB_HDR_BUF;
4261                         /* Handle the header mbuf */
4262                         mh = rxr->rx_buffers[i].m_head;
4263                         mh->m_len = hlen;
4264                         dopayload = IGB_CLEAN_HEADER;
4265                         /*
4266                         ** Get the payload length, this
4267                         ** could be zero if its a small
4268                         ** packet.
4269                         */
4270                         if (plen > 0) {
4271                                 mp = rxr->rx_buffers[i].m_pack;
4272                                 mp->m_len = plen;
4273                                 mh->m_next = mp;
4274                                 dopayload = IGB_CLEAN_BOTH;
4275                                 rxr->rx_split_packets++;
4276                         }
4277                 } else {
4278                         /*
4279                         ** Either no header split, or a
4280                         ** secondary piece of a fragmented
4281                         ** split packet.
4282                         */
4283                         mh = rxr->rx_buffers[i].m_pack;
4284                         mh->m_len = plen;
4285                         dopayload = IGB_CLEAN_PAYLOAD;
4286                 }
4287
4288                 /*
4289                 ** get_buf will overwrite the writeback
4290                 ** descriptor so save the VLAN tag now.
4291                 */
4292                 vtag = le16toh(cur->wb.upper.vlan);
4293                 if (igb_get_buf(rxr, i, dopayload) != 0) {
4294                         ifp->if_iqdrops++;
4295                         /*
4296                          * We've dropped a frame due to lack of resources
4297                          * so we should drop entire multi-segmented
4298                          * frames until we encounter EOP.
4299                          */
4300                         if ((staterr & E1000_RXD_STAT_EOP) != 0)
4301                                 rxr->discard = TRUE;
4302                         igb_rx_discard(rxr, cur, i);
4303                         goto next_desc;
4304                 }
4305
4306                 /* Initial frame - setup */
4307                 if (rxr->fmp == NULL) {
4308                         mh->m_pkthdr.len = mh->m_len;
4309                         /* Store the first mbuf */
4310                         rxr->fmp = mh;
4311                         rxr->lmp = mh;
4312                         if (mp != NULL) {
4313                                 /* Add payload if split */
4314                                 mh->m_pkthdr.len += mp->m_len;
4315                                 rxr->lmp = mh->m_next;
4316                         }
4317                 } else {
4318                         /* Chain mbuf's together */
4319                         rxr->lmp->m_next = mh;
4320                         rxr->lmp = rxr->lmp->m_next;
4321                         rxr->fmp->m_pkthdr.len += mh->m_len;
4322                 }
4323
4324                 if (eop) {
4325                         rxr->fmp->m_pkthdr.rcvif = ifp;
4326                         ifp->if_ipackets++;
4327                         rxr->rx_packets++;
4328                         /* capture data for AIM */
4329                         rxr->packets++;
4330                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4331                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4332
4333                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4334                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4335                         /* XXX igb(4) always strips VLAN. */
4336                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4337                             (staterr & E1000_RXD_STAT_VP) != 0) {
4338                                 rxr->fmp->m_pkthdr.ether_vlantag = vtag;
4339                                 rxr->fmp->m_flags |= M_VLANTAG;
4340                         }
4341 #if __FreeBSD_version >= 800000
4342                         rxr->fmp->m_pkthdr.flowid = curcpu;
4343                         rxr->fmp->m_flags |= M_FLOWID;
4344 #endif
4345                         sendmp = rxr->fmp;
4346                         /* Make sure to set M_PKTHDR. */
4347                         sendmp->m_flags |= M_PKTHDR;
4348                         rxr->fmp = NULL;
4349                         rxr->lmp = NULL;
4350                 }
4351
4352 next_desc:
4353                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4354                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4355
4356                 rxr->last_cleaned = i; /* For updating tail */
4357
4358                 /* Advance our pointers to the next descriptor. */
4359                 if (++i == adapter->num_rx_desc)
4360                         i = 0;
4361  
4362                 /*
4363                 ** Note that we hold the RX lock thru
4364                 ** the following call so this ring's
4365                 ** next_to_check is not gonna change.
4366                 */
4367                 if (sendmp != NULL)
4368                         igb_rx_input(rxr, ifp, sendmp, ptype);
4369         }
4370
4371         if (prog == 0) {
4372                 IGB_RX_UNLOCK(rxr);
4373                 return (FALSE);
4374         }
4375
4376         rxr->next_to_check = i;
4377
4378         /* Advance the E1000's Receive Queue "Tail Pointer". */
4379         E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4380
4381         /*
4382          * Flush any outstanding LRO work
4383          */
4384 #ifdef NET_LRO
4385         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4386                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4387                 tcp_lro_flush(lro, queued);
4388         }
4389 #endif
4390
4391         IGB_RX_UNLOCK(rxr);
4392
4393         /*
4394         ** We still have cleaning to do?
4395         ** Schedule another interrupt if so.
4396         */
4397         if ((staterr & E1000_RXD_STAT_DD) != 0)
4398                 return (TRUE);
4399
4400         return (FALSE);
4401 }
4402
4403 /*********************************************************************
4404  *
4405  *  Verify that the hardware indicated that the checksum is valid.
4406  *  Inform the stack about the status of checksum so that stack
4407  *  doesn't spend time verifying the checksum.
4408  *
4409  *********************************************************************/
4410 static void
4411 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4412 {
4413         u16 status = (u16)staterr;
4414         u8  errors = (u8) (staterr >> 24);
4415         int sctp;
4416
4417         /* Ignore Checksum bit is set */
4418         if (status & E1000_RXD_STAT_IXSM) {
4419                 mp->m_pkthdr.csum_flags = 0;
4420                 return;
4421         }
4422
4423         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4424             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4425                 sctp = 1;
4426         else
4427                 sctp = 0;
4428         if (status & E1000_RXD_STAT_IPCS) {
4429                 /* Did it pass? */
4430                 if (!(errors & E1000_RXD_ERR_IPE)) {
4431                         /* IP Checksum Good */
4432                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4433                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4434                 } else
4435                         mp->m_pkthdr.csum_flags = 0;
4436         }
4437
4438         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4439                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4440 #if __FreeBSD_version >= 800000
4441                 if (sctp) /* reassign */
4442                         type = CSUM_SCTP_VALID;
4443 #endif
4444                 /* Did it pass? */
4445                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4446                         mp->m_pkthdr.csum_flags |= type;
4447                         if (sctp == 0)
4448                                 mp->m_pkthdr.csum_data = htons(0xffff);
4449                 }
4450         }
4451         return;
4452 }
4453
4454 /*
4455  * This routine is run via an vlan
4456  * config EVENT
4457  */
4458 static void
4459 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4460 {
4461         struct adapter  *adapter = ifp->if_softc;
4462         u32             index, bit;
4463
4464         if (ifp->if_softc !=  arg)   /* Not our event */
4465                 return;
4466
4467         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4468                 return;
4469
4470         index = (vtag >> 5) & 0x7F;
4471         bit = vtag & 0x1F;
4472         igb_shadow_vfta[index] |= (1 << bit);
4473         ++adapter->num_vlans;
4474         /* Re-init to load the changes */
4475         igb_init(adapter);
4476 }
4477
4478 /*
4479  * This routine is run via an vlan
4480  * unconfig EVENT
4481  */
4482 static void
4483 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4484 {
4485         struct adapter  *adapter = ifp->if_softc;
4486         u32             index, bit;
4487
4488         if (ifp->if_softc !=  arg)
4489                 return;
4490
4491         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4492                 return;
4493
4494         index = (vtag >> 5) & 0x7F;
4495         bit = vtag & 0x1F;
4496         igb_shadow_vfta[index] &= ~(1 << bit);
4497         --adapter->num_vlans;
4498         /* Re-init to load the changes */
4499         igb_init(adapter);
4500 }
4501
4502 static void
4503 igb_setup_vlan_hw_support(struct adapter *adapter)
4504 {
4505         struct e1000_hw *hw = &adapter->hw;
4506         u32             reg;
4507
4508         /*
4509         ** We get here thru init_locked, meaning
4510         ** a soft reset, this has already cleared
4511         ** the VFTA and other state, so if there
4512         ** have been no vlan's registered do nothing.
4513         */
4514         if (adapter->num_vlans == 0)
4515                 return;
4516
4517         /*
4518         ** A soft reset zero's out the VFTA, so
4519         ** we need to repopulate it now.
4520         */
4521         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4522                 if (igb_shadow_vfta[i] != 0)
4523                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4524                             i, igb_shadow_vfta[i]);
4525
4526         reg = E1000_READ_REG(hw, E1000_CTRL);
4527         reg |= E1000_CTRL_VME;
4528         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4529
4530         /* Enable the Filter Table */
4531         reg = E1000_READ_REG(hw, E1000_RCTL);
4532         reg &= ~E1000_RCTL_CFIEN;
4533         reg |= E1000_RCTL_VFE;
4534         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4535
4536         /* Update the frame size */
4537         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4538             adapter->max_frame_size + VLAN_TAG_SIZE);
4539 }
4540
4541 static void
4542 igb_enable_intr(struct adapter *adapter)
4543 {
4544         /* With RSS set up what to auto clear */
4545         if (adapter->msix_mem) {
4546                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4547                     adapter->eims_mask);
4548                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4549                     adapter->eims_mask);
4550                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4551                     adapter->eims_mask);
4552                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4553                     E1000_IMS_LSC);
4554         } else {
4555                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4556                     IMS_ENABLE_MASK);
4557         }
4558         E1000_WRITE_FLUSH(&adapter->hw);
4559
4560         return;
4561 }
4562
4563 static void
4564 igb_disable_intr(struct adapter *adapter)
4565 {
4566         if (adapter->msix_mem) {
4567                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4568                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4569         } 
4570         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4571         E1000_WRITE_FLUSH(&adapter->hw);
4572         return;
4573 }
4574
4575 /*
4576  * Bit of a misnomer, what this really means is
4577  * to enable OS management of the system... aka
4578  * to disable special hardware management features 
4579  */
4580 static void
4581 igb_init_manageability(struct adapter *adapter)
4582 {
4583         if (adapter->has_manage) {
4584                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4585                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4586
4587                 /* disable hardware interception of ARP */
4588                 manc &= ~(E1000_MANC_ARP_EN);
4589
4590                 /* enable receiving management packets to the host */
4591                 manc |= E1000_MANC_EN_MNG2HOST;
4592                 manc2h |= 1 << 5;  /* Mng Port 623 */
4593                 manc2h |= 1 << 6;  /* Mng Port 664 */
4594                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4595                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4596         }
4597 }
4598
4599 /*
4600  * Give control back to hardware management
4601  * controller if there is one.
4602  */
4603 static void
4604 igb_release_manageability(struct adapter *adapter)
4605 {
4606         if (adapter->has_manage) {
4607                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4608
4609                 /* re-enable hardware interception of ARP */
4610                 manc |= E1000_MANC_ARP_EN;
4611                 manc &= ~E1000_MANC_EN_MNG2HOST;
4612
4613                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4614         }
4615 }
4616
4617 /*
4618  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4619  * For ASF and Pass Through versions of f/w this means that
4620  * the driver is loaded. 
4621  *
4622  */
4623 static void
4624 igb_get_hw_control(struct adapter *adapter)
4625 {
4626         u32 ctrl_ext;
4627
4628         /* Let firmware know the driver has taken over */
4629         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4630         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4631             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4632 }
4633
4634 /*
4635  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4636  * For ASF and Pass Through versions of f/w this means that the
4637  * driver is no longer loaded.
4638  *
4639  */
4640 static void
4641 igb_release_hw_control(struct adapter *adapter)
4642 {
4643         u32 ctrl_ext;
4644
4645         /* Let firmware taken over control of h/w */
4646         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4647         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4648             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4649 }
4650
4651 static int
4652 igb_is_valid_ether_addr(uint8_t *addr)
4653 {
4654         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4655
4656         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4657                 return (FALSE);
4658         }
4659
4660         return (TRUE);
4661 }
4662
4663
4664 /*
4665  * Enable PCI Wake On Lan capability
4666  */
4667 void
4668 igb_enable_wakeup(device_t dev)
4669 {
4670         u16     cap, status;
4671         u8      id;
4672
4673         /* First find the capabilities pointer*/
4674         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4675         /* Read the PM Capabilities */
4676         id = pci_read_config(dev, cap, 1);
4677         if (id != PCIY_PMG)     /* Something wrong */
4678                 return;
4679         /* OK, we have the power capabilities, so
4680            now get the status register */
4681         cap += PCIR_POWER_STATUS;
4682         status = pci_read_config(dev, cap, 2);
4683         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4684         pci_write_config(dev, cap, status, 2);
4685         return;
4686 }
4687
4688
4689 /**********************************************************************
4690  *
4691  *  Update the board statistics counters.
4692  *
4693  **********************************************************************/
4694 static void
4695 igb_update_stats_counters(struct adapter *adapter)
4696 {
4697         struct ifnet   *ifp;
4698
4699         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4700            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4701                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4702                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4703         }
4704         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4705         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4706         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4707         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4708
4709         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4710         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4711         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4712         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4713         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4714         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4715         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4716         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4717         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4718         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4719         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4720         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4721         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4722         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4723         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4724         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4725         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4726         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4727         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4728         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4729
4730         /* For the 64-bit byte counters the low dword must be read first. */
4731         /* Both registers clear on the read of the high dword */
4732
4733         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4734         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4735
4736         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4737         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4738         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4739         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4740         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4741
4742         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4743         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4744
4745         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4746         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4747         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4748         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4749         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4750         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4751         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4752         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4753         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4754         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4755
4756         adapter->stats.algnerrc += 
4757                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4758         adapter->stats.rxerrc += 
4759                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4760         adapter->stats.tncrs += 
4761                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4762         adapter->stats.cexterr += 
4763                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4764         adapter->stats.tsctc += 
4765                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4766         adapter->stats.tsctfc += 
4767                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4768         ifp = adapter->ifp;
4769
4770         ifp->if_collisions = adapter->stats.colc;
4771
4772         /* Rx Errors */
4773         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4774             adapter->stats.crcerrs + adapter->stats.algnerrc +
4775             adapter->stats.ruc + adapter->stats.roc +
4776             adapter->stats.mpc + adapter->stats.cexterr;
4777
4778         /* Tx Errors */
4779         ifp->if_oerrors = adapter->stats.ecol +
4780             adapter->stats.latecol + adapter->watchdog_events;
4781 }
4782
4783
4784 /**********************************************************************
4785  *
4786  *  This routine is called only when igb_display_debug_stats is enabled.
4787  *  This routine provides a way to take a look at important statistics
4788  *  maintained by the driver and hardware.
4789  *
4790  **********************************************************************/
4791 static void
4792 igb_print_debug_info(struct adapter *adapter)
4793 {
4794         device_t dev = adapter->dev;
4795         struct igb_queue *que = adapter->queues;
4796         struct rx_ring *rxr = adapter->rx_rings;
4797         struct tx_ring *txr = adapter->tx_rings;
4798         uint8_t *hw_addr = adapter->hw.hw_addr;
4799
4800         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4801         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4802             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4803             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4804
4805 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4806         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4807             E1000_READ_REG(&adapter->hw, E1000_IMS),
4808             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4809 #endif
4810
4811         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4812             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4813             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4814         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4815             adapter->hw.fc.high_water,
4816             adapter->hw.fc.low_water);
4817
4818         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4819                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4820                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4821                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4822                 device_printf(dev, "rdh = %d, rdt = %d\n",
4823                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4824                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4825                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4826                     txr->me, (long long)txr->no_desc_avail);
4827                 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4828                     txr->me, (long long)txr->tx_packets);
4829                 device_printf(dev, "RX(%d) Packets received = %lld  ",
4830                     rxr->me, (long long)rxr->rx_packets);
4831         }
4832
4833         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4834 #ifdef NET_LRO
4835                 struct lro_ctrl *lro = &rxr->lro;
4836 #endif
4837                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4838                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4839                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4840                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4841                     (long long)rxr->rx_packets);
4842                 device_printf(dev, " Split Packets = %lld ",
4843                     (long long)rxr->rx_split_packets);
4844                 device_printf(dev, " Byte count = %lld\n",
4845                     (long long)rxr->rx_bytes);
4846 #ifdef NET_LRO
4847                 device_printf(dev,"RX(%d) LRO Queued= %d  ",
4848                     i, lro->lro_queued);
4849                 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4850 #endif
4851         }
4852
4853         for (int i = 0; i < adapter->num_queues; i++, que++)
4854                 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4855                     i, (long long)que->irqs);
4856
4857         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4858         device_printf(dev, "Mbuf defrag failed = %ld\n",
4859             adapter->mbuf_defrag_failed);
4860         device_printf(dev, "Std mbuf header failed = %ld\n",
4861             adapter->mbuf_header_failed);
4862         device_printf(dev, "Std mbuf packet failed = %ld\n",
4863             adapter->mbuf_packet_failed);
4864         device_printf(dev, "Driver dropped packets = %ld\n",
4865             adapter->dropped_pkts);
4866         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4867                 adapter->no_tx_dma_setup);
4868 }
4869
4870 static void
4871 igb_print_hw_stats(struct adapter *adapter)
4872 {
4873         device_t dev = adapter->dev;
4874
4875         device_printf(dev, "Excessive collisions = %lld\n",
4876             (long long)adapter->stats.ecol);
4877 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4878         device_printf(dev, "Symbol errors = %lld\n",
4879             (long long)adapter->stats.symerrs);
4880 #endif
4881         device_printf(dev, "Sequence errors = %lld\n",
4882             (long long)adapter->stats.sec);
4883         device_printf(dev, "Defer count = %lld\n",
4884             (long long)adapter->stats.dc);
4885         device_printf(dev, "Missed Packets = %lld\n",
4886             (long long)adapter->stats.mpc);
4887         device_printf(dev, "Receive No Buffers = %lld\n",
4888             (long long)adapter->stats.rnbc);
4889         /* RLEC is inaccurate on some hardware, calculate our own. */
4890         device_printf(dev, "Receive Length Errors = %lld\n",
4891             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4892         device_printf(dev, "Receive errors = %lld\n",
4893             (long long)adapter->stats.rxerrc);
4894         device_printf(dev, "Crc errors = %lld\n",
4895             (long long)adapter->stats.crcerrs);
4896         device_printf(dev, "Alignment errors = %lld\n",
4897             (long long)adapter->stats.algnerrc);
4898         /* On 82575 these are collision counts */
4899         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4900             (long long)adapter->stats.cexterr);
4901         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4902         device_printf(dev, "watchdog timeouts = %ld\n",
4903             adapter->watchdog_events);
4904         device_printf(dev, "XON Rcvd = %lld\n",
4905             (long long)adapter->stats.xonrxc);
4906         device_printf(dev, "XON Xmtd = %lld\n",
4907             (long long)adapter->stats.xontxc);
4908         device_printf(dev, "XOFF Rcvd = %lld\n",
4909             (long long)adapter->stats.xoffrxc);
4910         device_printf(dev, "XOFF Xmtd = %lld\n",
4911             (long long)adapter->stats.xofftxc);
4912         device_printf(dev, "Good Packets Rcvd = %lld\n",
4913             (long long)adapter->stats.gprc);
4914         device_printf(dev, "Good Packets Xmtd = %lld\n",
4915             (long long)adapter->stats.gptc);
4916         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4917             (long long)adapter->stats.tsctc);
4918         device_printf(dev, "TSO Contexts Failed = %lld\n",
4919             (long long)adapter->stats.tsctfc);
4920 }
4921
4922 /**********************************************************************
4923  *
4924  *  This routine provides a way to dump out the adapter eeprom,
4925  *  often a useful debug/service tool. This only dumps the first
4926  *  32 words, stuff that matters is in that extent.
4927  *
4928  **********************************************************************/
4929 static void
4930 igb_print_nvm_info(struct adapter *adapter)
4931 {
4932         u16     eeprom_data;
4933         int     i, j, row = 0;
4934
4935         /* Its a bit crude, but it gets the job done */
4936         kprintf("\nInterface EEPROM Dump:\n");
4937         kprintf("Offset\n0x0000  ");
4938         for (i = 0, j = 0; i < 32; i++, j++) {
4939                 if (j == 8) { /* Make the offset block */
4940                         j = 0; ++row;
4941                         kprintf("\n0x00%x0  ",row);
4942                 }
4943                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4944                 kprintf("%04x ", eeprom_data);
4945         }
4946         kprintf("\n");
4947 }
4948
4949 static int
4950 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4951 {
4952         struct adapter *adapter;
4953         int error;
4954         int result;
4955
4956         result = -1;
4957         error = sysctl_handle_int(oidp, &result, 0, req);
4958
4959         if (error || !req->newptr)
4960                 return (error);
4961
4962         if (result == 1) {
4963                 adapter = (struct adapter *)arg1;
4964                 igb_print_debug_info(adapter);
4965         }
4966         /*
4967          * This value will cause a hex dump of the
4968          * first 32 16-bit words of the EEPROM to
4969          * the screen.
4970          */
4971         if (result == 2) {
4972                 adapter = (struct adapter *)arg1;
4973                 igb_print_nvm_info(adapter);
4974         }
4975
4976         return (error);
4977 }
4978
4979
4980 static int
4981 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4982 {
4983         struct adapter *adapter;
4984         int error;
4985         int result;
4986
4987         result = -1;
4988         error = sysctl_handle_int(oidp, &result, 0, req);
4989
4990         if (error || !req->newptr)
4991                 return (error);
4992
4993         if (result == 1) {
4994                 adapter = (struct adapter *)arg1;
4995                 igb_print_hw_stats(adapter);
4996         }
4997
4998         return (error);
4999 }
5000
5001 static void
5002 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5003         const char *description, int *limit, int value)
5004 {
5005         *limit = value;
5006         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5007             SYSCTL_CHILDREN(adapter->sysctl_tree),
5008             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5009 }