Unbreak buildkernel / mxge(4)
[dragonfly.git] / sys / dev / netif / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_altq.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59
60 #ifdef IGB_IEEE1588
61 #include <sys/ieee1588.h>
62 #endif
63
64 #include <net/bpf.h>
65 #include <net/ethernet.h>
66 #include <net/if.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 #include <net/ifq_var.h>
71
72 #include <net/if_types.h>
73 #include <net/vlan/if_vlan_var.h>
74 #include <net/vlan/if_vlan_ether.h>
75
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #ifdef NET_LRO
83 #include <netinet/tcp_lro.h>
84 #endif
85 #include <netinet/udp.h>
86
87 #include <sys/in_cksum.h>
88 #include <bus/pci/pcivar.h>
89 #include <bus/pci/pcireg.h>
90
91 #include "e1000_api.h"
92 #include "e1000_82575.h"
93 #include "if_igb.h"
94 #include "ifcap_defines.h" // XXX
95
96 /*********************************************************************
97  *  Set this to one to display debug statistics
98  *********************************************************************/
99 int     igb_display_debug_stats = 0;
100
101 /*********************************************************************
102  *  Driver version:
103  *********************************************************************/
104 char igb_driver_version[] = "version - 1.9.1";
105
106
107 /*********************************************************************
108  *  PCI Device ID Table
109  *
110  *  Used by probe to select devices to load on
111  *  Last field stores an index into e1000_strings
112  *  Last entry must be all 0s
113  *
114  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115  *********************************************************************/
116
117 static igb_vendor_info_t igb_vendor_info_array[] =
118 {
119         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         /* required last entry */
140         { 0, 0, 0, 0, 0}
141 };
142
143 /*********************************************************************
144  *  Table of branding strings for all supported NICs.
145  *********************************************************************/
146
147 static char *igb_strings[] = {
148         "Intel(R) PRO/1000 Network Connection"
149 };
150
151 /*********************************************************************
152  *  Function prototypes
153  *********************************************************************/
154 static int      igb_probe(device_t);
155 static int      igb_attach(device_t);
156 static int      igb_detach(device_t);
157 static int      igb_shutdown(device_t);
158 static int      igb_suspend(device_t);
159 static int      igb_resume(device_t);
160 static void     igb_start(struct ifnet *);
161 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
162 #if __FreeBSD_version >= 800000
163 static int      igb_mq_start(struct ifnet *, struct mbuf *);
164 static int      igb_mq_start_locked(struct ifnet *,
165                     struct tx_ring *, struct mbuf *);
166 static void     igb_qflush(struct ifnet *);
167 #endif
168 static int      igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
169 static void     igb_init(void *);
170 static void     igb_init_locked(struct adapter *);
171 static void     igb_stop(void *);
172 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
173 static int      igb_media_change(struct ifnet *);
174 static void     igb_identify_hardware(struct adapter *);
175 static int      igb_allocate_pci_resources(struct adapter *);
176 static int      igb_allocate_msix(struct adapter *);
177 static int      igb_allocate_legacy(struct adapter *);
178 static int      igb_setup_msix(struct adapter *);
179 static void     igb_free_pci_resources(struct adapter *);
180 static void     igb_local_timer(void *);
181 static void     igb_reset(struct adapter *);
182 static void     igb_setup_interface(device_t, struct adapter *);
183 static int      igb_allocate_queues(struct adapter *);
184 static void     igb_configure_queues(struct adapter *);
185
186 static int      igb_allocate_transmit_buffers(struct tx_ring *);
187 static void     igb_setup_transmit_structures(struct adapter *);
188 static void     igb_setup_transmit_ring(struct tx_ring *);
189 static void     igb_initialize_transmit_units(struct adapter *);
190 static void     igb_free_transmit_structures(struct adapter *);
191 static void     igb_free_transmit_buffers(struct tx_ring *);
192
193 static int      igb_allocate_receive_buffers(struct rx_ring *);
194 static int      igb_setup_receive_structures(struct adapter *);
195 static int      igb_setup_receive_ring(struct rx_ring *);
196 static void     igb_initialize_receive_units(struct adapter *);
197 static void     igb_free_receive_structures(struct adapter *);
198 static void     igb_free_receive_buffers(struct rx_ring *);
199 static void     igb_free_receive_ring(struct rx_ring *);
200
201 static void     igb_enable_intr(struct adapter *);
202 static void     igb_disable_intr(struct adapter *);
203 static void     igb_update_stats_counters(struct adapter *);
204 static bool     igb_txeof(struct tx_ring *);
205
206 static __inline void igb_rx_discard(struct rx_ring *,
207                     union e1000_adv_rx_desc *, int);
208 static __inline void igb_rx_input(struct rx_ring *,
209                     struct ifnet *, struct mbuf *, u32);
210
211 static bool     igb_rxeof(struct rx_ring *, int);
212 static void     igb_rx_checksum(u32, struct mbuf *, u32);
213 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
214 #if NET_TSO 
215 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
216 #endif
217 static void     igb_set_promisc(struct adapter *);
218 static void     igb_disable_promisc(struct adapter *);
219 static void     igb_set_multi(struct adapter *);
220 static void     igb_print_hw_stats(struct adapter *);
221 static void     igb_update_link_status(struct adapter *);
222 static int      igb_get_buf(struct rx_ring *, int, u8);
223
224 static void     igb_register_vlan(void *, struct ifnet *, u16);
225 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
226 static void     igb_setup_vlan_hw_support(struct adapter *);
227
228 static int      igb_xmit(struct tx_ring *, struct mbuf **);
229 static int      igb_dma_malloc(struct adapter *, bus_size_t,
230                     struct igb_dma_alloc *, int);
231 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
232 static void     igb_print_debug_info(struct adapter *);
233 static void     igb_print_nvm_info(struct adapter *);
234 static int      igb_is_valid_ether_addr(u8 *);
235 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
236 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
237 /* Management and WOL Support */
238 static void     igb_init_manageability(struct adapter *);
239 static void     igb_release_manageability(struct adapter *);
240 static void     igb_get_hw_control(struct adapter *);
241 static void     igb_release_hw_control(struct adapter *);
242 static void     igb_enable_wakeup(device_t);
243
244 static void     igb_irq_fast(void *);
245 static void     igb_add_rx_process_limit(struct adapter *, const char *,
246                     const char *, int *, int);
247 static void     igb_handle_rxtx(void *context, int pending);
248 static void     igb_handle_que(void *context, int pending);
249 static void     igb_handle_link(void *context, int pending);
250
251 /* These are MSIX only irq handlers */
252 static void     igb_msix_que(void *);
253 static void     igb_msix_link(void *);
254
255 #ifdef DEVICE_POLLING
256 static poll_handler_t igb_poll;
257 #endif /* POLLING */
258
259 /*********************************************************************
260  *  FreeBSD Device Interface Entry Points
261  *********************************************************************/
262
263 static device_method_t igb_methods[] = {
264         /* Device interface */
265         DEVMETHOD(device_probe, igb_probe),
266         DEVMETHOD(device_attach, igb_attach),
267         DEVMETHOD(device_detach, igb_detach),
268         DEVMETHOD(device_shutdown, igb_shutdown),
269         DEVMETHOD(device_suspend, igb_suspend),
270         DEVMETHOD(device_resume, igb_resume),
271         {0, 0}
272 };
273
274 static driver_t igb_driver = {
275         "igb", igb_methods, sizeof(struct adapter),
276 };
277
278 static devclass_t igb_devclass;
279 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
280 MODULE_DEPEND(igb, pci, 1, 1, 1);
281 MODULE_DEPEND(igb, ether, 1, 1, 1);
282
283 /*********************************************************************
284  *  Tunable default values.
285  *********************************************************************/
286
287 /* Descriptor defaults */
288 static int igb_rxd = IGB_DEFAULT_RXD;
289 static int igb_txd = IGB_DEFAULT_TXD;
290 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
291 TUNABLE_INT("hw.igb.txd", &igb_txd);
292
293 /*
294 ** AIM: Adaptive Interrupt Moderation
295 ** which means that the interrupt rate
296 ** is varied over time based on the
297 ** traffic for that interrupt vector
298 */
299 static int igb_enable_aim = TRUE;
300 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
301
302 /*
303  * MSIX should be the default for best performance,
304  * but this allows it to be forced off for testing.
305  */         
306 static int igb_enable_msix = 1;
307 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
308
309 /*
310  * Header split has seemed to be beneficial in
311  * many circumstances tested, however there have
312  * been some stability issues, so the default is
313  * off. 
314  */
315 static bool igb_header_split = FALSE;
316 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
317
318 /*
319 ** This will autoconfigure based on
320 ** the number of CPUs if left at 0.
321 */
322 static int igb_num_queues = 0;
323 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
324
325 /* How many packets rxeof tries to clean at a time */
326 static int igb_rx_process_limit = 100;
327 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
328
329 /* Flow control setting - default to FULL */
330 static int igb_fc_setting = e1000_fc_full;
331 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
332
333 /*
334 ** Shadow VFTA table, this is needed because
335 ** the real filter table gets cleared during
336 ** a soft reset and the driver needs to be able
337 ** to repopulate it.
338 */
339 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
340
341
342 /*********************************************************************
343  *  Device identification routine
344  *
345  *  igb_probe determines if the driver should be loaded on
346  *  adapter based on PCI vendor/device id of the adapter.
347  *
348  *  return BUS_PROBE_DEFAULT on success, positive on failure
349  *********************************************************************/
350
351 static int
352 igb_probe(device_t dev)
353 {
354         char            adapter_name[60];
355         uint16_t        pci_vendor_id = 0;
356         uint16_t        pci_device_id = 0;
357         uint16_t        pci_subvendor_id = 0;
358         uint16_t        pci_subdevice_id = 0;
359         igb_vendor_info_t *ent;
360
361         INIT_DEBUGOUT("igb_probe: begin");
362
363         pci_vendor_id = pci_get_vendor(dev);
364         if (pci_vendor_id != IGB_VENDOR_ID)
365                 return (ENXIO);
366
367         pci_device_id = pci_get_device(dev);
368         pci_subvendor_id = pci_get_subvendor(dev);
369         pci_subdevice_id = pci_get_subdevice(dev);
370
371         ent = igb_vendor_info_array;
372         while (ent->vendor_id != 0) {
373                 if ((pci_vendor_id == ent->vendor_id) &&
374                     (pci_device_id == ent->device_id) &&
375
376                     ((pci_subvendor_id == ent->subvendor_id) ||
377                     (ent->subvendor_id == PCI_ANY_ID)) &&
378
379                     ((pci_subdevice_id == ent->subdevice_id) ||
380                     (ent->subdevice_id == PCI_ANY_ID))) {
381                         ksprintf(adapter_name, "%s %s",
382                                 igb_strings[ent->index],
383                                 igb_driver_version);
384                         device_set_desc_copy(dev, adapter_name);
385                         return (BUS_PROBE_DEFAULT);
386                 }
387                 ent++;
388         }
389
390         return (ENXIO);
391 }
392
393 /*********************************************************************
394  *  Device initialization routine
395  *
396  *  The attach entry point is called when the driver is being loaded.
397  *  This routine identifies the type of hardware, allocates all resources
398  *  and initializes the hardware.
399  *
400  *  return 0 on success, positive on failure
401  *********************************************************************/
402
403 static int
404 igb_attach(device_t dev)
405 {
406         struct adapter  *adapter;
407         int             error = 0;
408         u16             eeprom_data;
409
410         INIT_DEBUGOUT("igb_attach: begin");
411
412         adapter = device_get_softc(dev);
413         adapter->dev = adapter->osdep.dev = dev;
414         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
415
416         /* SYSCTL stuff */
417         sysctl_ctx_init(&adapter->sysctl_ctx);
418         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
419                                         SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
420                                         device_get_nameunit(adapter->dev),
421                                         CTLFLAG_RD, 0, "");
422         if (adapter->sysctl_tree == NULL) {
423                 device_printf(adapter->dev, "can't add sysctl node\n");
424                 error = ENOMEM;
425                 goto err_sysctl;
426         }
427
428         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
429             SYSCTL_CHILDREN(adapter->sysctl_tree),
430             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
431             igb_sysctl_debug_info, "I", "Debug Information");
432
433         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
434             SYSCTL_CHILDREN(adapter->sysctl_tree),
435             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
436             igb_sysctl_stats, "I", "Statistics");
437
438         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
439             SYSCTL_CHILDREN(adapter->sysctl_tree),
440             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
441             &igb_fc_setting, 0, "Flow Control");
442
443         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
444             SYSCTL_CHILDREN(adapter->sysctl_tree),
445             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
446             &igb_enable_aim, 1, "Interrupt Moderation");
447
448         callout_init(&adapter->timer);
449
450         /* Determine hardware and mac info */
451         igb_identify_hardware(adapter);
452
453         /* Setup PCI resources */
454         if (igb_allocate_pci_resources(adapter)) {
455                 device_printf(dev, "Allocation of PCI resources failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         /* Do Shared Code initialization */
461         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
462                 device_printf(dev, "Setup of Shared code failed\n");
463                 error = ENXIO;
464                 goto err_pci;
465         }
466
467         e1000_get_bus_info(&adapter->hw);
468
469         /* Sysctls for limiting the amount of work done in the taskqueue */
470         igb_add_rx_process_limit(adapter, "rx_processing_limit",
471             "max number of rx packets to process", &adapter->rx_process_limit,
472             igb_rx_process_limit);
473
474         /*
475          * Validate number of transmit and receive descriptors. It
476          * must not exceed hardware maximum, and must be multiple
477          * of E1000_DBA_ALIGN.
478          */
479         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
480             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
481                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
482                     IGB_DEFAULT_TXD, igb_txd);
483                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
484         } else
485                 adapter->num_tx_desc = igb_txd;
486         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
487             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
488                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
489                     IGB_DEFAULT_RXD, igb_rxd);
490                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
491         } else
492                 adapter->num_rx_desc = igb_rxd;
493
494         adapter->hw.mac.autoneg = DO_AUTO_NEG;
495         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
496         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
497
498         /* Copper options */
499         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
500                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
501                 adapter->hw.phy.disable_polarity_correction = FALSE;
502                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
503         }
504
505         /*
506          * Set the frame limits assuming
507          * standard ethernet sized frames.
508          */
509         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
510         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
511
512         /*
513         ** Allocate and Setup Queues
514         */
515         if (igb_allocate_queues(adapter)) {
516                 error = ENOMEM;
517                 goto err_pci;
518         }
519
520         /*
521         ** Start from a known state, this is
522         ** important in reading the nvm and
523         ** mac from that.
524         */
525         e1000_reset_hw(&adapter->hw);
526
527         /* Make sure we have a good EEPROM before we read from it */
528         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
529                 /*
530                 ** Some PCI-E parts fail the first check due to
531                 ** the link being in sleep state, call it again,
532                 ** if it fails a second time its a real issue.
533                 */
534                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
535                         device_printf(dev,
536                             "The EEPROM Checksum Is Not Valid\n");
537                         error = EIO;
538                         goto err_late;
539                 }
540         }
541
542         /*
543         ** Copy the permanent MAC address out of the EEPROM
544         */
545         if (e1000_read_mac_addr(&adapter->hw) < 0) {
546                 device_printf(dev, "EEPROM read error while reading MAC"
547                     " address\n");
548                 error = EIO;
549                 goto err_late;
550         }
551         /* Check its sanity */
552         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
553                 device_printf(dev, "Invalid MAC address\n");
554                 error = EIO;
555                 goto err_late;
556         }
557
558         /* 
559         ** Configure Interrupts
560         */
561         if ((adapter->msix > 1) && (igb_enable_msix))
562                 error = igb_allocate_msix(adapter);
563         else /* MSI or Legacy */
564                 error = igb_allocate_legacy(adapter);
565         if (error)
566                 goto err_late;
567
568         /* Setup OS specific network interface */
569         igb_setup_interface(dev, adapter);
570
571         /* Now get a good starting state */
572         igb_reset(adapter);
573
574         /* Initialize statistics */
575         igb_update_stats_counters(adapter);
576
577         adapter->hw.mac.get_link_status = 1;
578         igb_update_link_status(adapter);
579
580         /* Indicate SOL/IDER usage */
581         if (e1000_check_reset_block(&adapter->hw))
582                 device_printf(dev,
583                     "PHY reset is blocked due to SOL/IDER session.\n");
584
585         /* Determine if we have to control management hardware */
586         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
587
588         /*
589          * Setup Wake-on-Lan
590          */
591         /* APME bit in EEPROM is mapped to WUC.APME */
592         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
593         if (eeprom_data)
594                 adapter->wol = E1000_WUFC_MAG;
595
596         /* Register for VLAN events */
597         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
598              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
599         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
600              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
601
602         /* Tell the stack that the interface is not active */
603         adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
604
605         INIT_DEBUGOUT("igb_attach: end");
606
607         return (0);
608
609 err_late:
610         igb_free_transmit_structures(adapter);
611         igb_free_receive_structures(adapter);
612         igb_release_hw_control(adapter);
613 err_pci:
614         igb_free_pci_resources(adapter);
615 err_sysctl:
616         sysctl_ctx_free(&adapter->sysctl_ctx);
617         IGB_CORE_LOCK_DESTROY(adapter);
618
619         return (error);
620 }
621
622 /*********************************************************************
623  *  Device removal routine
624  *
625  *  The detach entry point is called when the driver is being removed.
626  *  This routine stops the adapter and deallocates all the resources
627  *  that were allocated for driver operation.
628  *
629  *  return 0 on success, positive on failure
630  *********************************************************************/
631
632 static int
633 igb_detach(device_t dev)
634 {
635         struct adapter  *adapter = device_get_softc(dev);
636
637         INIT_DEBUGOUT("igb_detach: begin");
638
639         /* Make sure VLANS are not using driver */
640         if (adapter->ifp->if_vlantrunks != NULL) {
641                 device_printf(dev,"Vlan in use, detach first\n");
642                 return (EBUSY);
643         }
644
645 #ifdef DEVICE_POLLING
646         if (adapter->ifp->if_capenable & IFCAP_POLLING)
647                 ether_poll_deregister(adapter->ifp);
648 #endif
649
650         IGB_CORE_LOCK(adapter);
651         adapter->in_detach = 1;
652         igb_stop(adapter);
653         IGB_CORE_UNLOCK(adapter);
654
655         e1000_phy_hw_reset(&adapter->hw);
656
657         /* Give control back to firmware */
658         igb_release_manageability(adapter);
659         igb_release_hw_control(adapter);
660
661         if (adapter->wol) {
662                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
663                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
664                 igb_enable_wakeup(dev);
665         }
666
667         /* Unregister VLAN events */
668         if (adapter->vlan_attach != NULL)
669                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
670         if (adapter->vlan_detach != NULL)
671                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
672
673         ether_ifdetach(adapter->ifp);
674
675         //callout_drain(&adapter->timer);
676         callout_stop(&adapter->timer);
677
678         igb_free_pci_resources(adapter);
679         bus_generic_detach(dev);
680
681         igb_free_transmit_structures(adapter);
682         igb_free_receive_structures(adapter);
683
684         sysctl_ctx_free(&adapter->sysctl_ctx);
685         IGB_CORE_LOCK_DESTROY(adapter);
686
687         return (0);
688 }
689
690 /*********************************************************************
691  *
692  *  Shutdown entry point
693  *
694  **********************************************************************/
695
696 static int
697 igb_shutdown(device_t dev)
698 {
699         return igb_suspend(dev);
700 }
701
702 /*
703  * Suspend/resume device methods.
704  */
705 static int
706 igb_suspend(device_t dev)
707 {
708         struct adapter *adapter = device_get_softc(dev);
709
710         IGB_CORE_LOCK(adapter);
711
712         igb_stop(adapter);
713
714         igb_release_manageability(adapter);
715         igb_release_hw_control(adapter);
716
717         if (adapter->wol) {
718                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
719                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
720                 igb_enable_wakeup(dev);
721         }
722
723         IGB_CORE_UNLOCK(adapter);
724
725         return bus_generic_suspend(dev);
726 }
727
728 static int
729 igb_resume(device_t dev)
730 {
731         struct adapter *adapter = device_get_softc(dev);
732         struct ifnet *ifp = adapter->ifp;
733
734         IGB_CORE_LOCK(adapter);
735         igb_init_locked(adapter);
736         igb_init_manageability(adapter);
737
738         if ((ifp->if_flags & IFF_UP) &&
739             (ifp->if_flags & IFF_RUNNING))
740                 igb_start(ifp);
741
742         IGB_CORE_UNLOCK(adapter);
743
744         return bus_generic_resume(dev);
745 }
746
747
748 /*********************************************************************
749  *  Transmit entry point
750  *
751  *  igb_start is called by the stack to initiate a transmit.
752  *  The driver will remain in this routine as long as there are
753  *  packets to transmit and transmit resources are available.
754  *  In case resources are not available stack is notified and
755  *  the packet is requeued.
756  **********************************************************************/
757
758 static void
759 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
760 {
761         struct adapter  *adapter = ifp->if_softc;
762         struct mbuf     *m_head;
763
764         IGB_TX_LOCK_ASSERT(txr);
765
766         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
767             IFF_RUNNING)
768                 return;
769         if (!adapter->link_active)
770                 return;
771
772         while (!ifq_is_empty(&ifp->if_snd)) {
773
774                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
775                 if (m_head == NULL)
776                         break;
777                 /*
778                  *  Encapsulation can modify our pointer, and or make it
779                  *  NULL on failure.  In that event, we can't requeue.
780                  */
781                 if (igb_xmit(txr, &m_head)) {
782                         if (m_head == NULL)
783                                 break;
784                         ifp->if_flags |= IFF_OACTIVE;
785                         ifq_prepend(&ifp->if_snd, m_head);
786                         break;
787                 }
788
789                 /* Send a copy of the frame to the BPF listener */
790                 ETHER_BPF_MTAP(ifp, m_head);
791
792                 /* Set watchdog on */
793                 txr->watchdog_check = TRUE;
794         }
795 }
796  
797 /*
798  * Legacy TX driver routine, called from the
799  * stack, always uses tx[0], and spins for it.
800  * Should not be used with multiqueue tx
801  */
802 static void
803 igb_start(struct ifnet *ifp)
804 {
805         struct adapter  *adapter = ifp->if_softc;
806         struct tx_ring  *txr = adapter->tx_rings;
807
808         if (ifp->if_flags & IFF_RUNNING) {
809                 IGB_TX_LOCK(txr);
810                 igb_start_locked(txr, ifp);
811                 IGB_TX_UNLOCK(txr);
812         }
813         return;
814 }
815
816 #if __FreeBSD_version >= 800000
817 /*
818 ** Multiqueue Transmit driver
819 **
820 */
821 static int
822 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
823 {
824         struct adapter  *adapter = ifp->if_softc;
825         struct tx_ring  *txr;
826         int             i = 0, err = 0;
827
828         /* Which queue to use */
829         if ((m->m_flags & M_FLOWID) != 0)
830                 i = m->m_pkthdr.flowid % adapter->num_queues;
831         txr = &adapter->tx_rings[i];
832
833         if (IGB_TX_TRYLOCK(txr)) {
834                 err = igb_mq_start_locked(ifp, txr, m);
835                 IGB_TX_UNLOCK(txr);
836         } else
837                 err = drbr_enqueue(ifp, txr->br, m);
838
839         return (err);
840 }
841
842 static int
843 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
844 {
845         struct adapter  *adapter = txr->adapter;
846         struct mbuf     *next;
847         int             err = 0, enq;
848
849         IGB_TX_LOCK_ASSERT(txr);
850
851         if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) !=
852             IFF_RUNNING || adapter->link_active == 0) {
853                 if (m != NULL)
854                         err = drbr_enqueue(ifp, txr->br, m);
855                 return (err);
856         }
857
858         enq = 0;
859         if (m == NULL) {
860                 next = drbr_dequeue(ifp, txr->br);
861         } else if (drbr_needs_enqueue(ifp, txr->br)) {
862                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
863                         return (err);
864                 next = drbr_dequeue(ifp, txr->br);
865         } else
866                 next = m;
867         /* Process the queue */
868         while (next != NULL) {
869                 if ((err = igb_xmit(txr, &next)) != 0) {
870                         if (next != NULL)
871                                 err = drbr_enqueue(ifp, txr->br, next);
872                         break;
873                 }
874                 enq++;
875                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
876                 ETHER_BPF_MTAP(ifp, next);
877                 if ((ifp->if_flags & IFF_RUNNING) == 0)
878                         break;
879                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
880                         ifp->if_flags |= IFF_OACTIVE;
881                         break;
882                 }
883                 next = drbr_dequeue(ifp, txr->br);
884         }
885         if (enq > 0) {
886                 /* Set the watchdog */
887                 txr->watchdog_check = TRUE;
888         }
889         return (err);
890 }
891
892 /*
893 ** Flush all ring buffers
894 */
895 static void
896 igb_qflush(struct ifnet *ifp)
897 {
898         struct adapter  *adapter = ifp->if_softc;
899         struct tx_ring  *txr = adapter->tx_rings;
900         struct mbuf     *m;
901
902         for (int i = 0; i < adapter->num_queues; i++, txr++) {
903                 IGB_TX_LOCK(txr);
904                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
905                         m_freem(m);
906                 IGB_TX_UNLOCK(txr);
907         }
908         if_qflush(ifp);
909 }
910 #endif /* __FreeBSD_version >= 800000 */
911
912 /*********************************************************************
913  *  Ioctl entry point
914  *
915  *  igb_ioctl is called when the user wants to configure the
916  *  interface.
917  *
918  *  return 0 on success, positive on failure
919  **********************************************************************/
920
921 static int
922 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cred)
923 {
924         struct adapter  *adapter = ifp->if_softc;
925         struct ifreq *ifr = (struct ifreq *)data;
926 #ifdef INET
927         struct ifaddr *ifa = (struct ifaddr *)data;
928 #endif
929         int error = 0;
930
931         if (adapter->in_detach)
932                 return (error);
933
934         switch (command) {
935         case SIOCSIFADDR:
936 #ifdef INET
937                 if (ifa->ifa_addr->sa_family == AF_INET) {
938                         /*
939                          * XXX
940                          * Since resetting hardware takes a very long time
941                          * and results in link renegotiation we only
942                          * initialize the hardware only when it is absolutely
943                          * required.
944                          */
945                         ifp->if_flags |= IFF_UP;
946                         if (!(ifp->if_flags & IFF_RUNNING)) {
947                                 IGB_CORE_LOCK(adapter);
948                                 igb_init_locked(adapter);
949                                 IGB_CORE_UNLOCK(adapter);
950                         }
951                         if (!(ifp->if_flags & IFF_NOARP))
952                                 arp_ifinit(ifp, ifa);
953                 } else
954 #endif
955                         error = ether_ioctl(ifp, command, data);
956                 break;
957         case SIOCSIFMTU:
958             {
959                 int max_frame_size;
960
961                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
962
963                 IGB_CORE_LOCK(adapter);
964                 max_frame_size = 9234;
965                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
966                     ETHER_CRC_LEN) {
967                         IGB_CORE_UNLOCK(adapter);
968                         error = EINVAL;
969                         break;
970                 }
971
972                 ifp->if_mtu = ifr->ifr_mtu;
973                 adapter->max_frame_size =
974                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
975                 igb_init_locked(adapter);
976                 IGB_CORE_UNLOCK(adapter);
977                 break;
978             }
979         case SIOCSIFFLAGS:
980                 IOCTL_DEBUGOUT("ioctl rcv'd:\
981                     SIOCSIFFLAGS (Set Interface Flags)");
982                 IGB_CORE_LOCK(adapter);
983                 if (ifp->if_flags & IFF_UP) {
984                         if ((ifp->if_flags & IFF_RUNNING)) {
985                                 if ((ifp->if_flags ^ adapter->if_flags) &
986                                     (IFF_PROMISC | IFF_ALLMULTI)) {
987                                         igb_disable_promisc(adapter);
988                                         igb_set_promisc(adapter);
989                                 }
990                         } else
991                                 igb_init_locked(adapter);
992                 } else
993                         if (ifp->if_flags & IFF_RUNNING)
994                                 igb_stop(adapter); 
995                 adapter->if_flags = ifp->if_flags;
996                 IGB_CORE_UNLOCK(adapter);
997                 break;
998         case SIOCADDMULTI:
999         case SIOCDELMULTI:
1000                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1001                 if (ifp->if_flags & IFF_RUNNING) {
1002                         IGB_CORE_LOCK(adapter);
1003                         igb_disable_intr(adapter);
1004                         igb_set_multi(adapter);
1005 #ifdef DEVICE_POLLING
1006                         if (!(ifp->if_capenable & IFCAP_POLLING))
1007 #endif
1008                                 igb_enable_intr(adapter);
1009                         IGB_CORE_UNLOCK(adapter);
1010                 }
1011                 break;
1012         case SIOCSIFMEDIA:
1013                 /* Check SOL/IDER usage */
1014                 IGB_CORE_LOCK(adapter);
1015                 if (e1000_check_reset_block(&adapter->hw)) {
1016                         IGB_CORE_UNLOCK(adapter);
1017                         device_printf(adapter->dev, "Media change is"
1018                             " blocked due to SOL/IDER session.\n");
1019                         break;
1020                 }
1021                 IGB_CORE_UNLOCK(adapter);
1022         case SIOCGIFMEDIA:
1023                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1024                     SIOCxIFMEDIA (Get/Set Interface Media)");
1025                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1026                 break;
1027         case SIOCSIFCAP:
1028             {
1029                 int mask, reinit;
1030
1031                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1032                 reinit = 0;
1033                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1034 #ifdef DEVICE_POLLING
1035                 if (mask & IFCAP_POLLING) {
1036                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1037                                 error = ether_poll_register(igb_poll, ifp);
1038                                 if (error)
1039                                         return (error);
1040                                 IGB_CORE_LOCK(adapter);
1041                                 igb_disable_intr(adapter);
1042                                 ifp->if_capenable |= IFCAP_POLLING;
1043                                 IGB_CORE_UNLOCK(adapter);
1044                         } else {
1045                                 error = ether_poll_deregister(ifp);
1046                                 /* Enable interrupt even in error case */
1047                                 IGB_CORE_LOCK(adapter);
1048                                 igb_enable_intr(adapter);
1049                                 ifp->if_capenable &= ~IFCAP_POLLING;
1050                                 IGB_CORE_UNLOCK(adapter);
1051                         }
1052                 }
1053 #endif
1054                 if (mask & IFCAP_HWCSUM) {
1055                         ifp->if_capenable ^= IFCAP_HWCSUM;
1056                         reinit = 1;
1057                 }
1058 #ifdef NET_TSO 
1059                 if (mask & IFCAP_TSO4) {
1060                         ifp->if_capenable ^= IFCAP_TSO4;
1061                         reinit = 1;
1062                 }
1063 #endif
1064                 if (mask & IFCAP_VLAN_HWTAGGING) {
1065                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1066                         reinit = 1;
1067                 }
1068 #ifdef NET_LRO 
1069                 if (mask & IFCAP_LRO) {
1070                         ifp->if_capenable ^= IFCAP_LRO;
1071                         reinit = 1;
1072                 }
1073 #endif
1074                 if (reinit && (ifp->if_flags & IFF_RUNNING))
1075                         igb_init(adapter);
1076 #if 0
1077                 VLAN_CAPABILITIES(ifp);
1078 #endif
1079                 break;
1080             }
1081
1082         default:
1083                 error = ether_ioctl(ifp, command, data);
1084                 break;
1085         }
1086
1087         return (error);
1088 }
1089
1090
1091 /*********************************************************************
1092  *  Init entry point
1093  *
1094  *  This routine is used in two ways. It is used by the stack as
1095  *  init entry point in network interface structure. It is also used
1096  *  by the driver as a hw/sw initialization routine to get to a
1097  *  consistent state.
1098  *
1099  *  return 0 on success, positive on failure
1100  **********************************************************************/
1101
1102 static void
1103 igb_init_locked(struct adapter *adapter)
1104 {
1105         struct ifnet    *ifp = adapter->ifp;
1106         device_t        dev = adapter->dev;
1107
1108         INIT_DEBUGOUT("igb_init: begin");
1109
1110         IGB_CORE_LOCK_ASSERT(adapter);
1111
1112         igb_disable_intr(adapter);
1113         callout_stop(&adapter->timer);
1114
1115         /* Get the latest mac address, User can use a LAA */
1116         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1117               ETHER_ADDR_LEN);
1118
1119         /* Put the address into the Receive Address Array */
1120         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1121
1122         igb_reset(adapter);
1123         igb_update_link_status(adapter);
1124
1125         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1126
1127         /* Set hardware offload abilities */
1128         ifp->if_hwassist = 0;
1129         if (ifp->if_capenable & IFCAP_TXCSUM) {
1130                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1131 #if __FreeBSD_version >= 800000
1132                 if (adapter->hw.mac.type == e1000_82576)
1133                         ifp->if_hwassist |= CSUM_SCTP;
1134 #endif
1135         }
1136
1137 #ifdef NET_TSO
1138         if (ifp->if_capenable & IFCAP_TSO4)
1139                 ifp->if_hwassist |= CSUM_TSO;
1140 #endif
1141
1142         /* Configure for OS presence */
1143         igb_init_manageability(adapter);
1144
1145         /* Prepare transmit descriptors and buffers */
1146         igb_setup_transmit_structures(adapter);
1147         igb_initialize_transmit_units(adapter);
1148
1149         /* Setup Multicast table */
1150         igb_set_multi(adapter);
1151
1152         /*
1153         ** Figure out the desired mbuf pool
1154         ** for doing jumbo/packetsplit
1155         */
1156         if (ifp->if_mtu > ETHERMTU)
1157                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1158         else
1159                 adapter->rx_mbuf_sz = MCLBYTES;
1160
1161         /* Prepare receive descriptors and buffers */
1162         if (igb_setup_receive_structures(adapter)) {
1163                 device_printf(dev, "Could not setup receive structures\n");
1164                 return;
1165         }
1166         igb_initialize_receive_units(adapter);
1167
1168         /* Don't lose promiscuous settings */
1169         igb_set_promisc(adapter);
1170
1171         ifp->if_flags |= IFF_RUNNING;
1172         ifp->if_flags &= ~IFF_OACTIVE;
1173
1174         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1175         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1176
1177         if (adapter->msix > 1) /* Set up queue routing */
1178                 igb_configure_queues(adapter);
1179
1180         /* Set up VLAN tag offload and filter */
1181         igb_setup_vlan_hw_support(adapter);
1182
1183         /* this clears any pending interrupts */
1184         E1000_READ_REG(&adapter->hw, E1000_ICR);
1185 #ifdef DEVICE_POLLING
1186         /*
1187          * Only enable interrupts if we are not polling, make sure
1188          * they are off otherwise.
1189          */
1190         if (ifp->if_capenable & IFCAP_POLLING)
1191                 igb_disable_intr(adapter);
1192         else
1193 #endif /* DEVICE_POLLING */
1194         {
1195         igb_enable_intr(adapter);
1196         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1197         }
1198
1199         /* Don't reset the phy next time init gets called */
1200         adapter->hw.phy.reset_disable = TRUE;
1201 }
1202
1203 static void
1204 igb_init(void *arg)
1205 {
1206         struct adapter *adapter = arg;
1207
1208         IGB_CORE_LOCK(adapter);
1209         igb_init_locked(adapter);
1210         IGB_CORE_UNLOCK(adapter);
1211 }
1212
1213
1214 static void
1215 igb_handle_rxtx(void *context, int pending)
1216 {
1217         struct adapter  *adapter = context;
1218         struct tx_ring  *txr = adapter->tx_rings;
1219         struct rx_ring  *rxr = adapter->rx_rings;
1220         struct ifnet    *ifp;
1221
1222         ifp = adapter->ifp;
1223
1224         if (ifp->if_flags & IFF_RUNNING) {
1225                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1226                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1227                 IGB_TX_LOCK(txr);
1228                 igb_txeof(txr);
1229
1230 #if __FreeBSD_version >= 800000
1231                 if (!drbr_empty(ifp, txr->br))
1232                         igb_mq_start_locked(ifp, txr, NULL);
1233 #else
1234                 if (!ifq_is_empty(&ifp->if_snd))
1235                         igb_start_locked(txr, ifp);
1236 #endif
1237                 IGB_TX_UNLOCK(txr);
1238         }
1239
1240         igb_enable_intr(adapter);
1241 }
1242
1243 static void
1244 igb_handle_que(void *context, int pending)
1245 {
1246         struct igb_queue *que = context;
1247         struct adapter *adapter = que->adapter;
1248         struct tx_ring *txr = que->txr;
1249         struct rx_ring *rxr = que->rxr;
1250         struct ifnet    *ifp = adapter->ifp;
1251         u32             loop = IGB_MAX_LOOP;
1252         bool            more;
1253
1254         /* RX first */
1255         do {
1256                 more = igb_rxeof(rxr, -1);
1257         } while (loop-- && more);
1258
1259         if (IGB_TX_TRYLOCK(txr)) {
1260                 loop = IGB_MAX_LOOP;
1261                 do {
1262                         more = igb_txeof(txr);
1263                 } while (loop-- && more);
1264 #if __FreeBSD_version >= 800000
1265                 igb_mq_start_locked(ifp, txr, NULL);
1266 #else
1267                 if (!ifq_is_empty(&ifp->if_snd))
1268                         igb_start_locked(txr, ifp);
1269 #endif
1270                 IGB_TX_UNLOCK(txr);
1271         }
1272
1273         /* Reenable this interrupt */
1274 #ifdef DEVICE_POLLING
1275         if (!(ifp->if_capenable & IFCAP_POLLING))
1276 #endif
1277         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1278 }
1279
1280 /* Deal with link in a sleepable context */
1281 static void
1282 igb_handle_link(void *context, int pending)
1283 {
1284         struct adapter *adapter = context;
1285
1286         adapter->hw.mac.get_link_status = 1;
1287         igb_update_link_status(adapter);
1288 }
1289
1290 /*********************************************************************
1291  *
1292  *  MSI/Legacy Deferred
1293  *  Interrupt Service routine  
1294  *
1295  *********************************************************************/
1296 #define FILTER_STRAY
1297 #define FILTER_HANDLED
1298 static void
1299 igb_irq_fast(void *arg)
1300 {
1301         struct adapter  *adapter = arg;
1302         uint32_t        reg_icr;
1303
1304
1305         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1306
1307         /* Hot eject?  */
1308         if (reg_icr == 0xffffffff)
1309                 return FILTER_STRAY; 
1310
1311         /* Definitely not our interrupt.  */
1312         if (reg_icr == 0x0)
1313                 return FILTER_STRAY;
1314
1315         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1316                 return FILTER_STRAY;
1317
1318         /*
1319          * Mask interrupts until the taskqueue is finished running.  This is
1320          * cheap, just assume that it is needed.  This also works around the
1321          * MSI message reordering errata on certain systems.
1322          */
1323         igb_disable_intr(adapter);
1324         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1325
1326         /* Link status change */
1327         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1328                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1329
1330         if (reg_icr & E1000_ICR_RXO)
1331                 adapter->rx_overruns++;
1332         return FILTER_HANDLED;
1333 }
1334
1335 #ifdef DEVICE_POLLING
1336 /*********************************************************************
1337  *
1338  *  Legacy polling routine  
1339  *
1340  *********************************************************************/
1341 #if __FreeBSD_version >= 800000
1342 #define POLL_RETURN_COUNT(a) (a)
1343 static int
1344 #else
1345 #define POLL_RETURN_COUNT(a)
1346 static void
1347 #endif
1348 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1349 {
1350         struct adapter *adapter = ifp->if_softc;
1351         struct rx_ring  *rxr = adapter->rx_rings;
1352         struct tx_ring  *txr = adapter->tx_rings;
1353         u32             reg_icr, rx_done = 0;
1354         u32             loop = IGB_MAX_LOOP;
1355         bool            more;
1356
1357         IGB_CORE_LOCK(adapter);
1358         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1359                 IGB_CORE_UNLOCK(adapter);
1360                 return POLL_RETURN_COUNT(rx_done);
1361         }
1362
1363         if (cmd == POLL_AND_CHECK_STATUS) {
1364                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1365                 /* Link status change */
1366                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1367                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1368
1369                 if (reg_icr & E1000_ICR_RXO)
1370                         adapter->rx_overruns++;
1371         }
1372         IGB_CORE_UNLOCK(adapter);
1373
1374         /* TODO: rx_count */
1375         rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1376
1377         IGB_TX_LOCK(txr);
1378         do {
1379                 more = igb_txeof(txr);
1380         } while (loop-- && more);
1381 #if __FreeBSD_version >= 800000
1382         if (!drbr_empty(ifp, txr->br))
1383                 igb_mq_start_locked(ifp, txr, NULL);
1384 #else
1385         if (!ifq_is_empty(&ifp->if_snd))
1386                 igb_start_locked(txr, ifp);
1387 #endif
1388         IGB_TX_UNLOCK(txr);
1389         return POLL_RETURN_COUNT(rx_done);
1390 }
1391 #endif /* DEVICE_POLLING */
1392
1393 /*********************************************************************
1394  *
1395  *  MSIX TX Interrupt Service routine
1396  *
1397  **********************************************************************/
1398 static void
1399 igb_msix_que(void *arg)
1400 {
1401         struct igb_queue *que = arg;
1402         struct adapter *adapter = que->adapter;
1403         struct tx_ring *txr = que->txr;
1404         struct rx_ring *rxr = que->rxr;
1405         u32             newitr = 0;
1406         bool            more_tx, more_rx;
1407
1408         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1409         ++que->irqs;
1410
1411         IGB_TX_LOCK(txr);
1412         more_tx = igb_txeof(txr);
1413         IGB_TX_UNLOCK(txr);
1414
1415         more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1416
1417         if (igb_enable_aim == FALSE)
1418                 goto no_calc;
1419         /*
1420         ** Do Adaptive Interrupt Moderation:
1421         **  - Write out last calculated setting
1422         **  - Calculate based on average size over
1423         **    the last interval.
1424         */
1425         if (que->eitr_setting)
1426                 E1000_WRITE_REG(&adapter->hw,
1427                     E1000_EITR(que->msix), que->eitr_setting);
1428  
1429         que->eitr_setting = 0;
1430
1431         /* Idle, do nothing */
1432         if ((txr->bytes == 0) && (rxr->bytes == 0))
1433                 goto no_calc;
1434                                 
1435         /* Used half Default if sub-gig */
1436         if (adapter->link_speed != 1000)
1437                 newitr = IGB_DEFAULT_ITR / 2;
1438         else {
1439                 if ((txr->bytes) && (txr->packets))
1440                         newitr = txr->bytes/txr->packets;
1441                 if ((rxr->bytes) && (rxr->packets))
1442                         newitr = max(newitr,
1443                             (rxr->bytes / rxr->packets));
1444                 newitr += 24; /* account for hardware frame, crc */
1445                 /* set an upper boundary */
1446                 newitr = min(newitr, 3000);
1447                 /* Be nice to the mid range */
1448                 if ((newitr > 300) && (newitr < 1200))
1449                         newitr = (newitr / 3);
1450                 else
1451                         newitr = (newitr / 2);
1452         }
1453         newitr &= 0x7FFC;  /* Mask invalid bits */
1454         if (adapter->hw.mac.type == e1000_82575)
1455                 newitr |= newitr << 16;
1456         else
1457                 newitr |= 0x8000000;
1458                  
1459         /* save for next interrupt */
1460         que->eitr_setting = newitr;
1461
1462         /* Reset state */
1463         txr->bytes = 0;
1464         txr->packets = 0;
1465         rxr->bytes = 0;
1466         rxr->packets = 0;
1467
1468 no_calc:
1469         /* Schedule a clean task if needed*/
1470         if (more_tx || more_rx) 
1471                 taskqueue_enqueue(que->tq, &que->que_task);
1472         else
1473                 /* Reenable this interrupt */
1474                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1475         return;
1476 }
1477
1478
1479 /*********************************************************************
1480  *
1481  *  MSIX Link Interrupt Service routine
1482  *
1483  **********************************************************************/
1484
1485 static void
1486 igb_msix_link(void *arg)
1487 {
1488         struct adapter  *adapter = arg;
1489         u32             icr;
1490
1491         ++adapter->link_irq;
1492         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1493         if (!(icr & E1000_ICR_LSC))
1494                 goto spurious;
1495         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1496
1497 spurious:
1498         /* Rearm */
1499         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1500         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1501         return;
1502 }
1503
1504
1505 /*********************************************************************
1506  *
1507  *  Media Ioctl callback
1508  *
1509  *  This routine is called whenever the user queries the status of
1510  *  the interface using ifconfig.
1511  *
1512  **********************************************************************/
1513 static void
1514 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1515 {
1516         struct adapter *adapter = ifp->if_softc;
1517         u_char fiber_type = IFM_1000_SX;
1518
1519         INIT_DEBUGOUT("igb_media_status: begin");
1520
1521         IGB_CORE_LOCK(adapter);
1522         igb_update_link_status(adapter);
1523
1524         ifmr->ifm_status = IFM_AVALID;
1525         ifmr->ifm_active = IFM_ETHER;
1526
1527         if (!adapter->link_active) {
1528                 IGB_CORE_UNLOCK(adapter);
1529                 return;
1530         }
1531
1532         ifmr->ifm_status |= IFM_ACTIVE;
1533
1534         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1535             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1536                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1537         else {
1538                 switch (adapter->link_speed) {
1539                 case 10:
1540                         ifmr->ifm_active |= IFM_10_T;
1541                         break;
1542                 case 100:
1543                         ifmr->ifm_active |= IFM_100_TX;
1544                         break;
1545                 case 1000:
1546                         ifmr->ifm_active |= IFM_1000_T;
1547                         break;
1548                 }
1549                 if (adapter->link_duplex == FULL_DUPLEX)
1550                         ifmr->ifm_active |= IFM_FDX;
1551                 else
1552                         ifmr->ifm_active |= IFM_HDX;
1553         }
1554         IGB_CORE_UNLOCK(adapter);
1555 }
1556
1557 /*********************************************************************
1558  *
1559  *  Media Ioctl callback
1560  *
1561  *  This routine is called when the user changes speed/duplex using
1562  *  media/mediopt option with ifconfig.
1563  *
1564  **********************************************************************/
1565 static int
1566 igb_media_change(struct ifnet *ifp)
1567 {
1568         struct adapter *adapter = ifp->if_softc;
1569         struct ifmedia  *ifm = &adapter->media;
1570
1571         INIT_DEBUGOUT("igb_media_change: begin");
1572
1573         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1574                 return (EINVAL);
1575
1576         IGB_CORE_LOCK(adapter);
1577         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1578         case IFM_AUTO:
1579                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1580                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1581                 break;
1582         case IFM_1000_LX:
1583         case IFM_1000_SX:
1584         case IFM_1000_T:
1585                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1586                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1587                 break;
1588         case IFM_100_TX:
1589                 adapter->hw.mac.autoneg = FALSE;
1590                 adapter->hw.phy.autoneg_advertised = 0;
1591                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1592                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1593                 else
1594                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1595                 break;
1596         case IFM_10_T:
1597                 adapter->hw.mac.autoneg = FALSE;
1598                 adapter->hw.phy.autoneg_advertised = 0;
1599                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1600                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1601                 else
1602                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1603                 break;
1604         default:
1605                 device_printf(adapter->dev, "Unsupported media type\n");
1606         }
1607
1608         /* As the speed/duplex settings my have changed we need to
1609          * reset the PHY.
1610          */
1611         adapter->hw.phy.reset_disable = FALSE;
1612
1613         igb_init_locked(adapter);
1614         IGB_CORE_UNLOCK(adapter);
1615
1616         return (0);
1617 }
1618
1619
1620 /*********************************************************************
1621  *
1622  *  This routine maps the mbufs to Advanced TX descriptors.
1623  *  used by the 82575 adapter.
1624  *  
1625  **********************************************************************/
1626
1627 static int
1628 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1629 {
1630         struct adapter          *adapter = txr->adapter;
1631         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1632         bus_dmamap_t            map;
1633         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1634         union e1000_adv_tx_desc *txd = NULL;
1635         struct mbuf             *m_head;
1636         u32                     olinfo_status = 0, cmd_type_len = 0;
1637         int                     nsegs, i, j, error, first, last = 0;
1638         u32                     hdrlen = 0;
1639
1640         m_head = *m_headp;
1641
1642
1643         /* Set basic descriptor constants */
1644         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1645         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1646         if (m_head->m_flags & M_VLANTAG)
1647                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1648
1649         /*
1650          * Force a cleanup if number of TX descriptors
1651          * available hits the threshold
1652          */
1653         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1654                 igb_txeof(txr);
1655                 /* Now do we at least have a minimal? */
1656                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1657                         txr->no_desc_avail++;
1658                         return (ENOBUFS);
1659                 }
1660         }
1661
1662         /*
1663          * Map the packet for DMA.
1664          *
1665          * Capture the first descriptor index,
1666          * this descriptor will have the index
1667          * of the EOP which is the only one that
1668          * now gets a DONE bit writeback.
1669          */
1670         first = txr->next_avail_desc;
1671         tx_buffer = &txr->tx_buffers[first];
1672         tx_buffer_mapped = tx_buffer;
1673         map = tx_buffer->map;
1674
1675         error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1676             *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1677
1678         if (error == EFBIG) {
1679                 struct mbuf *m;
1680
1681                 m = m_defrag(*m_headp, MB_DONTWAIT);
1682                 if (m == NULL) {
1683                         adapter->mbuf_defrag_failed++;
1684                         m_freem(*m_headp);
1685                         *m_headp = NULL;
1686                         return (ENOBUFS);
1687                 }
1688                 *m_headp = m;
1689
1690                 /* Try it again */
1691                 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1692                     *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1693
1694                 if (error == ENOMEM) {
1695                         adapter->no_tx_dma_setup++;
1696                         return (error);
1697                 } else if (error != 0) {
1698                         adapter->no_tx_dma_setup++;
1699                         m_freem(*m_headp);
1700                         *m_headp = NULL;
1701                         return (error);
1702                 }
1703         } else if (error == ENOMEM) {
1704                 adapter->no_tx_dma_setup++;
1705                 return (error);
1706         } else if (error != 0) {
1707                 adapter->no_tx_dma_setup++;
1708                 m_freem(*m_headp);
1709                 *m_headp = NULL;
1710                 return (error);
1711         }
1712
1713         /* Check again to be sure we have enough descriptors */
1714         if (nsegs > (txr->tx_avail - 2)) {
1715                 txr->no_desc_avail++;
1716                 bus_dmamap_unload(txr->txtag, map);
1717                 return (ENOBUFS);
1718         }
1719         m_head = *m_headp;
1720
1721         /*
1722          * Set up the context descriptor:
1723          * used when any hardware offload is done.
1724          * This includes CSUM, VLAN, and TSO. It
1725          * will use the first descriptor.
1726          */
1727 #ifdef NET_TSO
1728         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1729                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1730                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1731                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1732                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1733                 } else
1734                         return (ENXIO); 
1735         } else
1736 #endif
1737                if (igb_tx_ctx_setup(txr, m_head))
1738                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1739
1740         /* Calculate payload length */
1741         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1742             << E1000_ADVTXD_PAYLEN_SHIFT);
1743
1744         /* 82575 needs the queue index added */
1745         if (adapter->hw.mac.type == e1000_82575)
1746                 olinfo_status |= txr->me << 4;
1747
1748         /* Set up our transmit descriptors */
1749         i = txr->next_avail_desc;
1750         for (j = 0; j < nsegs; j++) {
1751                 bus_size_t seg_len;
1752                 bus_addr_t seg_addr;
1753
1754                 tx_buffer = &txr->tx_buffers[i];
1755                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1756                 seg_addr = segs[j].ds_addr;
1757                 seg_len  = segs[j].ds_len;
1758
1759                 txd->read.buffer_addr = htole64(seg_addr);
1760                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1761                 txd->read.olinfo_status = htole32(olinfo_status);
1762                 last = i;
1763                 if (++i == adapter->num_tx_desc)
1764                         i = 0;
1765                 tx_buffer->m_head = NULL;
1766                 tx_buffer->next_eop = -1;
1767         }
1768
1769         txr->next_avail_desc = i;
1770         txr->tx_avail -= nsegs;
1771
1772         tx_buffer->m_head = m_head;
1773         tx_buffer_mapped->map = tx_buffer->map;
1774         tx_buffer->map = map;
1775         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1776
1777         /*
1778          * Last Descriptor of Packet
1779          * needs End Of Packet (EOP)
1780          * and Report Status (RS)
1781          */
1782         txd->read.cmd_type_len |=
1783             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1784         /*
1785          * Keep track in the first buffer which
1786          * descriptor will be written back
1787          */
1788         tx_buffer = &txr->tx_buffers[first];
1789         tx_buffer->next_eop = last;
1790         txr->watchdog_time = ticks;
1791
1792         /*
1793          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1794          * that this frame is available to transmit.
1795          */
1796         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1797             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1798         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1799         ++txr->tx_packets;
1800
1801         return (0);
1802
1803 }
1804
1805 static void
1806 igb_set_promisc(struct adapter *adapter)
1807 {
1808         struct ifnet    *ifp = adapter->ifp;
1809         uint32_t        reg_rctl;
1810
1811         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1812
1813         if (ifp->if_flags & IFF_PROMISC) {
1814                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1815                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1816         } else if (ifp->if_flags & IFF_ALLMULTI) {
1817                 reg_rctl |= E1000_RCTL_MPE;
1818                 reg_rctl &= ~E1000_RCTL_UPE;
1819                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1820         }
1821 }
1822
1823 static void
1824 igb_disable_promisc(struct adapter *adapter)
1825 {
1826         uint32_t        reg_rctl;
1827
1828         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1829
1830         reg_rctl &=  (~E1000_RCTL_UPE);
1831         reg_rctl &=  (~E1000_RCTL_MPE);
1832         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1833 }
1834
1835
1836 /*********************************************************************
1837  *  Multicast Update
1838  *
1839  *  This routine is called whenever multicast address list is updated.
1840  *
1841  **********************************************************************/
1842
1843 static void
1844 igb_set_multi(struct adapter *adapter)
1845 {
1846         struct ifnet    *ifp = adapter->ifp;
1847         struct ifmultiaddr *ifma;
1848         u32 reg_rctl = 0;
1849         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1850
1851         int mcnt = 0;
1852
1853         IOCTL_DEBUGOUT("igb_set_multi: begin");
1854
1855 #if 0
1856 #if __FreeBSD_version < 800000
1857         IF_ADDR_LOCK(ifp);
1858 #else
1859         if_maddr_rlock(ifp);
1860 #endif
1861 #endif
1862
1863         LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1864                 if (ifma->ifma_addr->sa_family != AF_LINK)
1865                         continue;
1866
1867                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1868                         break;
1869
1870                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1871                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1872                 mcnt++;
1873         }
1874 #if 0
1875 #if __FreeBSD_version < 800000
1876         IF_ADDR_UNLOCK(ifp);
1877 #else
1878         if_maddr_runlock(ifp);
1879 #endif
1880 #endif
1881
1882         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1883                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1884                 reg_rctl |= E1000_RCTL_MPE;
1885                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1886         } else
1887                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1888 }
1889
1890
1891 /*********************************************************************
1892  *  Timer routine:
1893  *      This routine checks for link status,
1894  *      updates statistics, and does the watchdog.
1895  *
1896  **********************************************************************/
1897
1898 static void
1899 igb_local_timer(void *arg)
1900 {
1901         struct adapter          *adapter = arg;
1902
1903         IGB_CORE_LOCK(adapter);
1904
1905         struct ifnet            *ifp = adapter->ifp;
1906         device_t                dev = adapter->dev;
1907         struct tx_ring          *txr = adapter->tx_rings;
1908
1909
1910         IGB_CORE_LOCK_ASSERT(adapter);
1911
1912         igb_update_link_status(adapter);
1913         igb_update_stats_counters(adapter);
1914
1915         if (igb_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1916                 igb_print_hw_stats(adapter);
1917
1918         /*
1919         ** Watchdog: check for time since any descriptor was cleaned
1920         */
1921         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1922                 if (txr->watchdog_check == FALSE)
1923                         continue;
1924                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1925                         goto timeout;
1926         }
1927
1928         /* Trigger an RX interrupt on all queues */
1929 #ifdef DEVICE_POLLING
1930         if (!(ifp->if_capenable & IFCAP_POLLING))
1931 #endif
1932         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1933         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1934         IGB_CORE_UNLOCK(adapter);
1935         return;
1936
1937 timeout:
1938         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1939         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1940             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1941             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1942         device_printf(dev,"TX(%d) desc avail = %d,"
1943             "Next TX to Clean = %d\n",
1944             txr->me, txr->tx_avail, txr->next_to_clean);
1945         adapter->ifp->if_flags &= ~IFF_RUNNING;
1946         adapter->watchdog_events++;
1947         igb_init_locked(adapter);
1948         IGB_CORE_UNLOCK(adapter);
1949 }
1950
1951 static void
1952 igb_update_link_status(struct adapter *adapter)
1953 {
1954         struct e1000_hw *hw = &adapter->hw;
1955         struct ifnet *ifp = adapter->ifp;
1956         device_t dev = adapter->dev;
1957         struct tx_ring *txr = adapter->tx_rings;
1958         u32 link_check = 0;
1959
1960         /* Get the cached link value or read for real */
1961         switch (hw->phy.media_type) {
1962         case e1000_media_type_copper:
1963                 if (hw->mac.get_link_status) {
1964                         /* Do the work to read phy */
1965                         e1000_check_for_link(hw);
1966                         link_check = !hw->mac.get_link_status;
1967                 } else
1968                         link_check = TRUE;
1969                 break;
1970         case e1000_media_type_fiber:
1971                 e1000_check_for_link(hw);
1972                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1973                                  E1000_STATUS_LU);
1974                 break;
1975         case e1000_media_type_internal_serdes:
1976                 e1000_check_for_link(hw);
1977                 link_check = adapter->hw.mac.serdes_has_link;
1978                 break;
1979         default:
1980         case e1000_media_type_unknown:
1981                 break;
1982         }
1983
1984         /* Now we check if a transition has happened */
1985         if (link_check && (adapter->link_active == 0)) {
1986                 e1000_get_speed_and_duplex(&adapter->hw, 
1987                     &adapter->link_speed, &adapter->link_duplex);
1988                 if (bootverbose)
1989                         device_printf(dev, "Link is up %d Mbps %s\n",
1990                             adapter->link_speed,
1991                             ((adapter->link_duplex == FULL_DUPLEX) ?
1992                             "Full Duplex" : "Half Duplex"));
1993                 adapter->link_active = 1;
1994                 ifp->if_baudrate = adapter->link_speed * 1000000;
1995                 ifp->if_link_state = LINK_STATE_UP;
1996                 if_link_state_change(ifp);
1997         } else if (!link_check && (adapter->link_active == 1)) {
1998                 ifp->if_baudrate = adapter->link_speed = 0;
1999                 adapter->link_duplex = 0;
2000                 if (bootverbose)
2001                         device_printf(dev, "Link is Down\n");
2002                 adapter->link_active = 0;
2003                 ifp->if_link_state = LINK_STATE_DOWN;
2004                 if_link_state_change(ifp);
2005                 /* Turn off watchdogs */
2006                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2007                         txr->watchdog_check = FALSE;
2008         }
2009 }
2010
2011 /*********************************************************************
2012  *
2013  *  This routine disables all traffic on the adapter by issuing a
2014  *  global reset on the MAC and deallocates TX/RX buffers.
2015  *
2016  **********************************************************************/
2017
2018 static void
2019 igb_stop(void *arg)
2020 {
2021         struct adapter  *adapter = arg;
2022         struct ifnet    *ifp = adapter->ifp;
2023         struct tx_ring *txr = adapter->tx_rings;
2024
2025         IGB_CORE_LOCK_ASSERT(adapter);
2026
2027         INIT_DEBUGOUT("igb_stop: begin");
2028
2029         igb_disable_intr(adapter);
2030
2031         callout_stop(&adapter->timer);
2032
2033         /* Tell the stack that the interface is no longer active */
2034         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2035
2036         /* Unarm watchdog timer. */
2037         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2038                 IGB_TX_LOCK(txr);
2039                 txr->watchdog_check = FALSE;
2040                 IGB_TX_UNLOCK(txr);
2041         }
2042
2043         e1000_reset_hw(&adapter->hw);
2044         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2045 }
2046
2047
2048 /*********************************************************************
2049  *
2050  *  Determine hardware revision.
2051  *
2052  **********************************************************************/
2053 static void
2054 igb_identify_hardware(struct adapter *adapter)
2055 {
2056         device_t dev = adapter->dev;
2057
2058         /* Make sure our PCI config space has the necessary stuff set */
2059         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2060         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2061             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2062                 device_printf(dev, "Memory Access and/or Bus Master bits "
2063                     "were not set!\n");
2064                 adapter->hw.bus.pci_cmd_word |=
2065                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2066                 pci_write_config(dev, PCIR_COMMAND,
2067                     adapter->hw.bus.pci_cmd_word, 2);
2068         }
2069
2070         /* Save off the information about this board */
2071         adapter->hw.vendor_id = pci_get_vendor(dev);
2072         adapter->hw.device_id = pci_get_device(dev);
2073         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2074         adapter->hw.subsystem_vendor_id =
2075             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2076         adapter->hw.subsystem_device_id =
2077             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2078
2079         /* Do Shared Code Init and Setup */
2080         if (e1000_set_mac_type(&adapter->hw)) {
2081                 device_printf(dev, "Setup init failure\n");
2082                 return;
2083         }
2084 }
2085
2086 static int
2087 igb_allocate_pci_resources(struct adapter *adapter)
2088 {
2089         device_t        dev = adapter->dev;
2090         int             rid;
2091
2092         rid = PCIR_BAR(0);
2093         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2094             &rid, RF_ACTIVE);
2095         if (adapter->pci_mem == NULL) {
2096                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2097                 return (ENXIO);
2098         }
2099         adapter->osdep.mem_bus_space_tag =
2100             rman_get_bustag(adapter->pci_mem);
2101         adapter->osdep.mem_bus_space_handle =
2102             rman_get_bushandle(adapter->pci_mem);
2103         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2104
2105         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2106
2107         /* This will setup either MSI/X or MSI */
2108         adapter->msix = igb_setup_msix(adapter);
2109         adapter->hw.back = &adapter->osdep;
2110
2111         return (0);
2112 }
2113
2114 /*********************************************************************
2115  *
2116  *  Setup the Legacy or MSI Interrupt handler
2117  *
2118  **********************************************************************/
2119 static int
2120 igb_allocate_legacy(struct adapter *adapter)
2121 {
2122         device_t dev = adapter->dev;
2123         int error, rid = 0;
2124
2125         /* Turn off all interrupts */
2126         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2127
2128         /* MSI RID is 1 */
2129         if (adapter->msix == 1)
2130                 rid = 1;
2131
2132         /* We allocate a single interrupt resource */
2133         adapter->res = bus_alloc_resource_any(dev,
2134             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2135         if (adapter->res == NULL) {
2136                 device_printf(dev, "Unable to allocate bus resource: "
2137                     "interrupt\n");
2138                 return (ENXIO);
2139         }
2140
2141         /*
2142          * Try allocating a fast interrupt and the associated deferred
2143          * processing contexts.
2144          */
2145         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2146         /* Make tasklet for deferred link handling */
2147         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2148         adapter->tq = taskqueue_create("igb_taskq", M_NOWAIT,
2149             taskqueue_thread_enqueue, &adapter->tq);
2150         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2151             device_get_nameunit(adapter->dev));
2152         if ((error = bus_setup_intr(dev, adapter->res,
2153             /*INTR_TYPE_NET |*/ INTR_MPSAFE, igb_irq_fast,
2154             adapter, &adapter->tag, NULL)) != 0) {
2155                 device_printf(dev, "Failed to register fast interrupt "
2156                             "handler: %d\n", error);
2157                 taskqueue_free(adapter->tq);
2158                 adapter->tq = NULL;
2159                 return (error);
2160         }
2161
2162         return (0);
2163 }
2164
2165
2166 /*********************************************************************
2167  *
2168  *  Setup the MSIX Queue Interrupt handlers: 
2169  *
2170  **********************************************************************/
2171 static int
2172 igb_allocate_msix(struct adapter *adapter)
2173 {
2174         device_t                dev = adapter->dev;
2175         struct igb_queue        *que = adapter->queues;
2176         int                     error, rid, vector = 0;
2177
2178
2179         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2180                 rid = vector + 1;
2181                 que->res = bus_alloc_resource_any(dev,
2182                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2183                 if (que->res == NULL) {
2184                         device_printf(dev,
2185                             "Unable to allocate bus resource: "
2186                             "MSIX Queue Interrupt\n");
2187                         return (ENXIO);
2188                 }
2189                 error = bus_setup_intr(dev, que->res,
2190                     /*INTR_TYPE_NET |*/ INTR_MPSAFE, 
2191                     igb_msix_que, que, &que->tag, NULL);
2192                 if (error) {
2193                         que->res = NULL;
2194                         device_printf(dev, "Failed to register Queue handler");
2195                         return (error);
2196                 }
2197                 que->msix = vector;
2198                 if (adapter->hw.mac.type == e1000_82575)
2199                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2200                 else
2201                         que->eims = 1 << vector;
2202                 /*
2203                 ** Bind the msix vector, and thus the
2204                 ** rings to the corresponding cpu.
2205                 */
2206 #if 0
2207                 if (adapter->num_queues > 1)
2208                         bus_bind_intr(dev, que->res, i);
2209 #endif
2210                 /* Make tasklet for deferred handling */
2211                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2212                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2213                     taskqueue_thread_enqueue, &que->tq);
2214                 taskqueue_start_threads(&que->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s que",
2215                     device_get_nameunit(adapter->dev));
2216         }
2217
2218         /* And Link */
2219         rid = vector + 1;
2220         adapter->res = bus_alloc_resource_any(dev,
2221             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2222         if (adapter->res == NULL) {
2223                 device_printf(dev,
2224                     "Unable to allocate bus resource: "
2225                     "MSIX Link Interrupt\n");
2226                 return (ENXIO);
2227         }
2228         if ((error = bus_setup_intr(dev, adapter->res,
2229             /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2230             igb_msix_link, adapter, &adapter->tag, NULL)) != 0) {
2231                 device_printf(dev, "Failed to register Link handler");
2232                 return (error);
2233         }
2234         adapter->linkvec = vector;
2235
2236         /* Make tasklet for deferred handling */
2237         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2238         adapter->tq = taskqueue_create("igb_link", M_NOWAIT,
2239             taskqueue_thread_enqueue, &adapter->tq);
2240         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s link",
2241             device_get_nameunit(adapter->dev));
2242
2243         return (0);
2244 }
2245
2246
2247 static void
2248 igb_configure_queues(struct adapter *adapter)
2249 {
2250         struct  e1000_hw        *hw = &adapter->hw;
2251         struct  igb_queue       *que;
2252         u32                     tmp, ivar = 0;
2253         u32                     newitr = IGB_DEFAULT_ITR;
2254
2255         /* First turn on RSS capability */
2256         if (adapter->hw.mac.type > e1000_82575)
2257                 E1000_WRITE_REG(hw, E1000_GPIE,
2258                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2259                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2260
2261         /* Turn on MSIX */
2262         switch (adapter->hw.mac.type) {
2263         case e1000_82580:
2264                 /* RX entries */
2265                 for (int i = 0; i < adapter->num_queues; i++) {
2266                         u32 index = i >> 1;
2267                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2268                         que = &adapter->queues[i];
2269                         if (i & 1) {
2270                                 ivar &= 0xFF00FFFF;
2271                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2272                         } else {
2273                                 ivar &= 0xFFFFFF00;
2274                                 ivar |= que->msix | E1000_IVAR_VALID;
2275                         }
2276                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2277                 }
2278                 /* TX entries */
2279                 for (int i = 0; i < adapter->num_queues; i++) {
2280                         u32 index = i >> 1;
2281                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2282                         que = &adapter->queues[i];
2283                         if (i & 1) {
2284                                 ivar &= 0x00FFFFFF;
2285                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2286                         } else {
2287                                 ivar &= 0xFFFF00FF;
2288                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2289                         }
2290                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2291                         adapter->eims_mask |= que->eims;
2292                 }
2293
2294                 /* And for the link interrupt */
2295                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2296                 adapter->link_mask = 1 << adapter->linkvec;
2297                 adapter->eims_mask |= adapter->link_mask;
2298                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2299                 break;
2300         case e1000_82576:
2301                 /* RX entries */
2302                 for (int i = 0; i < adapter->num_queues; i++) {
2303                         u32 index = i & 0x7; /* Each IVAR has two entries */
2304                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2305                         que = &adapter->queues[i];
2306                         if (i < 8) {
2307                                 ivar &= 0xFFFFFF00;
2308                                 ivar |= que->msix | E1000_IVAR_VALID;
2309                         } else {
2310                                 ivar &= 0xFF00FFFF;
2311                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2312                         }
2313                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2314                         adapter->eims_mask |= que->eims;
2315                 }
2316                 /* TX entries */
2317                 for (int i = 0; i < adapter->num_queues; i++) {
2318                         u32 index = i & 0x7; /* Each IVAR has two entries */
2319                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2320                         que = &adapter->queues[i];
2321                         if (i < 8) {
2322                                 ivar &= 0xFFFF00FF;
2323                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2324                         } else {
2325                                 ivar &= 0x00FFFFFF;
2326                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2327                         }
2328                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2329                         adapter->eims_mask |= que->eims;
2330                 }
2331
2332                 /* And for the link interrupt */
2333                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2334                 adapter->link_mask = 1 << adapter->linkvec;
2335                 adapter->eims_mask |= adapter->link_mask;
2336                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2337                 break;
2338
2339         case e1000_82575:
2340                 /* enable MSI-X support*/
2341                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2342                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2343                 /* Auto-Mask interrupts upon ICR read. */
2344                 tmp |= E1000_CTRL_EXT_EIAME;
2345                 tmp |= E1000_CTRL_EXT_IRCA;
2346                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2347
2348                 /* Queues */
2349                 for (int i = 0; i < adapter->num_queues; i++) {
2350                         que = &adapter->queues[i];
2351                         tmp = E1000_EICR_RX_QUEUE0 << i;
2352                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2353                         que->eims = tmp;
2354                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2355                             i, que->eims);
2356                         adapter->eims_mask |= que->eims;
2357                 }
2358
2359                 /* Link */
2360                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2361                     E1000_EIMS_OTHER);
2362                 adapter->link_mask |= E1000_EIMS_OTHER;
2363                 adapter->eims_mask |= adapter->link_mask;
2364         default:
2365                 break;
2366         }
2367
2368         /* Set the starting interrupt rate */
2369         if (hw->mac.type == e1000_82575)
2370                 newitr |= newitr << 16;
2371         else
2372                 newitr |= 0x8000000;
2373
2374         for (int i = 0; i < adapter->num_queues; i++) {
2375                 que = &adapter->queues[i];
2376                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2377         }
2378
2379         return;
2380 }
2381
2382
2383 static void
2384 igb_free_pci_resources(struct adapter *adapter)
2385 {
2386         struct          igb_queue *que = adapter->queues;
2387         device_t        dev = adapter->dev;
2388         int             rid;
2389
2390         /*
2391         ** There is a slight possibility of a failure mode
2392         ** in attach that will result in entering this function
2393         ** before interrupt resources have been initialized, and
2394         ** in that case we do not want to execute the loops below
2395         ** We can detect this reliably by the state of the adapter
2396         ** res pointer.
2397         */
2398         if (adapter->res == NULL)
2399                 goto mem;
2400
2401         /*
2402          * First release all the interrupt resources:
2403          */
2404         for (int i = 0; i < adapter->num_queues; i++, que++) {
2405                 rid = que->msix + 1;
2406                 if (que->tag != NULL) {
2407                         bus_teardown_intr(dev, que->res, que->tag);
2408                         que->tag = NULL;
2409                 }
2410                 if (que->res != NULL)
2411                         bus_release_resource(dev,
2412                             SYS_RES_IRQ, rid, que->res);
2413         }
2414
2415         /* Clean the Legacy or Link interrupt last */
2416         if (adapter->linkvec) /* we are doing MSIX */
2417                 rid = adapter->linkvec + 1;
2418         else
2419                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2420
2421         if (adapter->tag != NULL) {
2422                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2423                 adapter->tag = NULL;
2424         }
2425         if (adapter->res != NULL)
2426                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2427
2428 mem:
2429         if (adapter->msix)
2430                 pci_release_msi(dev);
2431
2432         if (adapter->msix_mem != NULL)
2433                 bus_release_resource(dev, SYS_RES_MEMORY,
2434                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2435
2436         if (adapter->pci_mem != NULL)
2437                 bus_release_resource(dev, SYS_RES_MEMORY,
2438                     PCIR_BAR(0), adapter->pci_mem);
2439
2440 }
2441
2442 /*
2443  * Setup Either MSI/X or MSI
2444  */
2445 static int
2446 igb_setup_msix(struct adapter *adapter)
2447 {
2448         device_t dev = adapter->dev;
2449         int rid, want, queues, msgs;
2450
2451         /* tuneable override */
2452         if (igb_enable_msix == 0)
2453                 goto msi;
2454
2455         /* First try MSI/X */
2456         rid = PCIR_BAR(IGB_MSIX_BAR);
2457         adapter->msix_mem = bus_alloc_resource_any(dev,
2458             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2459         if (!adapter->msix_mem) {
2460                 /* May not be enabled */
2461                 device_printf(adapter->dev,
2462                     "Unable to map MSIX table \n");
2463                 goto msi;
2464         }
2465
2466         msgs = pci_msix_count(dev); 
2467         if (msgs == 0) { /* system has msix disabled */
2468                 bus_release_resource(dev, SYS_RES_MEMORY,
2469                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2470                 adapter->msix_mem = NULL;
2471                 goto msi;
2472         }
2473
2474         /* Figure out a reasonable auto config value */
2475         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2476
2477         /* Can have max of 4 queues on 82575 */
2478         if (adapter->hw.mac.type == e1000_82575) {
2479                 if (queues > 4)
2480                         queues = 4;
2481                 if (igb_num_queues > 4)
2482                         igb_num_queues = 4;
2483         }
2484
2485         if (igb_num_queues == 0)
2486                 igb_num_queues = queues;
2487
2488         /*
2489         ** One vector (RX/TX pair) per queue
2490         ** plus an additional for Link interrupt
2491         */
2492         want = igb_num_queues + 1;
2493         if (msgs >= want)
2494                 msgs = want;
2495         else {
2496                 device_printf(adapter->dev,
2497                     "MSIX Configuration Problem, "
2498                     "%d vectors configured, but %d queues wanted!\n",
2499                     msgs, want);
2500                 return (ENXIO);
2501         }
2502         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2503                 device_printf(adapter->dev,
2504                     "Using MSIX interrupts with %d vectors\n", msgs);
2505                 adapter->num_queues = igb_num_queues;
2506                 return (msgs);
2507         }
2508 msi:
2509         msgs = pci_msi_count(dev);
2510         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2511                 device_printf(adapter->dev,"Using MSI interrupt\n");
2512         return (msgs);
2513 }
2514
2515 /*********************************************************************
2516  *
2517  *  Set up an fresh starting state
2518  *
2519  **********************************************************************/
2520 static void
2521 igb_reset(struct adapter *adapter)
2522 {
2523         device_t        dev = adapter->dev;
2524         struct e1000_hw *hw = &adapter->hw;
2525         struct e1000_fc_info *fc = &hw->fc;
2526         struct ifnet    *ifp = adapter->ifp;
2527         u32             pba = 0;
2528         u16             hwm;
2529
2530         INIT_DEBUGOUT("igb_reset: begin");
2531
2532         /* Let the firmware know the OS is in control */
2533         igb_get_hw_control(adapter);
2534
2535         /*
2536          * Packet Buffer Allocation (PBA)
2537          * Writing PBA sets the receive portion of the buffer
2538          * the remainder is used for the transmit buffer.
2539          */
2540         switch (hw->mac.type) {
2541         case e1000_82575:
2542                 pba = E1000_PBA_32K;
2543                 break;
2544         case e1000_82576:
2545                 pba = E1000_PBA_64K;
2546                 break;
2547         case e1000_82580:
2548                 pba = E1000_PBA_35K;
2549         default:
2550                 break;
2551         }
2552
2553         /* Special needs in case of Jumbo frames */
2554         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2555                 u32 tx_space, min_tx, min_rx;
2556                 pba = E1000_READ_REG(hw, E1000_PBA);
2557                 tx_space = pba >> 16;
2558                 pba &= 0xffff;
2559                 min_tx = (adapter->max_frame_size +
2560                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2561                 min_tx = roundup2(min_tx, 1024);
2562                 min_tx >>= 10;
2563                 min_rx = adapter->max_frame_size;
2564                 min_rx = roundup2(min_rx, 1024);
2565                 min_rx >>= 10;
2566                 if (tx_space < min_tx &&
2567                     ((min_tx - tx_space) < pba)) {
2568                         pba = pba - (min_tx - tx_space);
2569                         /*
2570                          * if short on rx space, rx wins
2571                          * and must trump tx adjustment
2572                          */
2573                         if (pba < min_rx)
2574                                 pba = min_rx;
2575                 }
2576                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2577         }
2578
2579         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2580
2581         /*
2582          * These parameters control the automatic generation (Tx) and
2583          * response (Rx) to Ethernet PAUSE frames.
2584          * - High water mark should allow for at least two frames to be
2585          *   received after sending an XOFF.
2586          * - Low water mark works best when it is very near the high water mark.
2587          *   This allows the receiver to restart by sending XON when it has
2588          *   drained a bit.
2589          */
2590         hwm = min(((pba << 10) * 9 / 10),
2591             ((pba << 10) - 2 * adapter->max_frame_size));
2592
2593         if (hw->mac.type < e1000_82576) {
2594                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2595                 fc->low_water = fc->high_water - 8;
2596         } else {
2597                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2598                 fc->low_water = fc->high_water - 16;
2599         }
2600
2601         fc->pause_time = IGB_FC_PAUSE_TIME;
2602         fc->send_xon = TRUE;
2603
2604         /* Set Flow control, use the tunable location if sane */
2605         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2606                 fc->requested_mode = igb_fc_setting;
2607         else
2608                 fc->requested_mode = e1000_fc_none;
2609
2610         fc->current_mode = fc->requested_mode;
2611
2612         /* Issue a global reset */
2613         e1000_reset_hw(hw);
2614         E1000_WRITE_REG(hw, E1000_WUC, 0);
2615
2616         if (e1000_init_hw(hw) < 0)
2617                 device_printf(dev, "Hardware Initialization Failed\n");
2618
2619         if (hw->mac.type == e1000_82580) {
2620                 u32 reg;
2621
2622                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2623                 /*
2624                  * 0x80000000 - enable DMA COAL
2625                  * 0x10000000 - use L0s as low power
2626                  * 0x20000000 - use L1 as low power
2627                  * X << 16 - exit dma coal when rx data exceeds X kB
2628                  * Y - upper limit to stay in dma coal in units of 32usecs
2629                  */
2630                 E1000_WRITE_REG(hw, E1000_DMACR,
2631                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2632
2633                 /* set hwm to PBA -  2 * max frame size */
2634                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2635                 /*
2636                  * This sets the time to wait before requesting transition to
2637                  * low power state to number of usecs needed to receive 1 512
2638                  * byte frame at gigabit line rate
2639                  */
2640                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2641
2642                 /* free space in tx packet buffer to wake from DMA coal */
2643                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2644                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2645
2646                 /* make low power state decision controlled by DMA coal */
2647                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2648                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2649                     reg | E1000_PCIEMISC_LX_DECISION);
2650         }
2651
2652         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2653         e1000_get_phy_info(hw);
2654         e1000_check_for_link(hw);
2655         return;
2656 }
2657
2658 /*********************************************************************
2659  *
2660  *  Setup networking device structure and register an interface.
2661  *
2662  **********************************************************************/
2663 static void
2664 igb_setup_interface(device_t dev, struct adapter *adapter)
2665 {
2666         struct ifnet   *ifp;
2667
2668         INIT_DEBUGOUT("igb_setup_interface: begin");
2669
2670         ifp = adapter->ifp = &adapter->arpcom.ac_if;
2671         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2672         ifp->if_mtu = ETHERMTU;
2673         ifp->if_init =  igb_init;
2674         ifp->if_softc = adapter;
2675         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2676         ifp->if_ioctl = igb_ioctl;
2677         ifp->if_start = igb_start;
2678 #if __FreeBSD_version >= 800000
2679         ifp->if_transmit = igb_mq_start;
2680         ifp->if_qflush = igb_qflush;
2681 #endif
2682         ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2683         ifq_set_ready(&ifp->if_snd);
2684
2685         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2686
2687         ifp->if_capabilities = ifp->if_capenable = 0;
2688
2689         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2690 #ifdef NET_TSO
2691         ifp->if_capabilities |= IFCAP_TSO4;
2692 #endif
2693         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2694 #ifdef NET_LRO
2695         if (igb_header_split)
2696                 ifp->if_capabilities |= IFCAP_LRO;
2697 #endif
2698
2699         ifp->if_capenable = ifp->if_capabilities;
2700 #ifdef DEVICE_POLLING
2701         ifp->if_capabilities |= IFCAP_POLLING;
2702 #endif
2703
2704         /*
2705          * Tell the upper layer(s) we support long frames.
2706          */
2707         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2708         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2709         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2710
2711         /*
2712          * Specify the media types supported by this adapter and register
2713          * callbacks to update media and link information
2714          */
2715         ifmedia_init(&adapter->media, IFM_IMASK,
2716             igb_media_change, igb_media_status);
2717         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2718             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2719                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2720                             0, NULL);
2721                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2722         } else {
2723                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2724                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2725                             0, NULL);
2726                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2727                             0, NULL);
2728                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2729                             0, NULL);
2730                 if (adapter->hw.phy.type != e1000_phy_ife) {
2731                         ifmedia_add(&adapter->media,
2732                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2733                         ifmedia_add(&adapter->media,
2734                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2735                 }
2736         }
2737         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2738         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2739 }
2740
2741
2742 /*
2743  * Manage DMA'able memory.
2744  */
2745 static void
2746 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2747 {
2748         if (error)
2749                 return;
2750         *(bus_addr_t *) arg = segs[0].ds_addr;
2751 }
2752
2753 static int
2754 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2755         struct igb_dma_alloc *dma, int mapflags)
2756 {
2757         int error;
2758
2759         error = bus_dma_tag_create(NULL,                /* parent */
2760                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2761                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2762                                 BUS_SPACE_MAXADDR,      /* highaddr */
2763                                 NULL, NULL,             /* filter, filterarg */
2764                                 size,                   /* maxsize */
2765                                 1,                      /* nsegments */
2766                                 size,                   /* maxsegsize */
2767                                 0,                      /* flags */
2768                                 &dma->dma_tag);
2769         if (error) {
2770                 device_printf(adapter->dev,
2771                     "%s: bus_dma_tag_create failed: %d\n",
2772                     __func__, error);
2773                 goto fail_0;
2774         }
2775
2776         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2777             BUS_DMA_NOWAIT, &dma->dma_map);
2778         if (error) {
2779                 device_printf(adapter->dev,
2780                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2781                     __func__, (uintmax_t)size, error);
2782                 goto fail_2;
2783         }
2784
2785         dma->dma_paddr = 0;
2786         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2787             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2788         if (error || dma->dma_paddr == 0) {
2789                 device_printf(adapter->dev,
2790                     "%s: bus_dmamap_load failed: %d\n",
2791                     __func__, error);
2792                 goto fail_3;
2793         }
2794
2795         return (0);
2796
2797 fail_3:
2798         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2799 fail_2:
2800         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2801         bus_dma_tag_destroy(dma->dma_tag);
2802 fail_0:
2803         dma->dma_map = NULL;
2804         dma->dma_tag = NULL;
2805
2806         return (error);
2807 }
2808
2809 static void
2810 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2811 {
2812         if (dma->dma_tag == NULL)
2813                 return;
2814         if (dma->dma_map != NULL) {
2815                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2816                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2817                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2818                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2819                 dma->dma_map = NULL;
2820         }
2821         bus_dma_tag_destroy(dma->dma_tag);
2822         dma->dma_tag = NULL;
2823 }
2824
2825
2826 /*********************************************************************
2827  *
2828  *  Allocate memory for the transmit and receive rings, and then
2829  *  the descriptors associated with each, called only once at attach.
2830  *
2831  **********************************************************************/
2832 static int
2833 igb_allocate_queues(struct adapter *adapter)
2834 {
2835         device_t dev = adapter->dev;
2836         struct igb_queue        *que = NULL;
2837         struct tx_ring          *txr = NULL;
2838         struct rx_ring          *rxr = NULL;
2839         int rsize, tsize, error = E1000_SUCCESS;
2840         int txconf = 0, rxconf = 0;
2841
2842         /* First allocate the top level queue structs */
2843         if (!(adapter->queues =
2844             (struct igb_queue *) kmalloc(sizeof(struct igb_queue) *
2845             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2846                 device_printf(dev, "Unable to allocate queue memory\n");
2847                 error = ENOMEM;
2848                 goto fail;
2849         }
2850
2851         /* Next allocate the TX ring struct memory */
2852         if (!(adapter->tx_rings =
2853             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2854             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2855                 device_printf(dev, "Unable to allocate TX ring memory\n");
2856                 error = ENOMEM;
2857                 goto tx_fail;
2858         }
2859
2860         /* Now allocate the RX */
2861         if (!(adapter->rx_rings =
2862             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2863             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2864                 device_printf(dev, "Unable to allocate RX ring memory\n");
2865                 error = ENOMEM;
2866                 goto rx_fail;
2867         }
2868
2869         tsize = roundup2(adapter->num_tx_desc *
2870             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2871         /*
2872          * Now set up the TX queues, txconf is needed to handle the
2873          * possibility that things fail midcourse and we need to
2874          * undo memory gracefully
2875          */ 
2876         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2877                 /* Set up some basics */
2878                 txr = &adapter->tx_rings[i];
2879                 txr->adapter = adapter;
2880                 txr->me = i;
2881
2882                 /* Initialize the TX lock */
2883                 ksnprintf(txr->spin_name, sizeof(txr->spin_name), "%s:tx(%d)",
2884                     device_get_nameunit(dev), txr->me);
2885
2886                 spin_init(&txr->tx_spin);
2887
2888                 if (igb_dma_malloc(adapter, tsize,
2889                         &txr->txdma, BUS_DMA_NOWAIT)) {
2890                         device_printf(dev,
2891                             "Unable to allocate TX Descriptor memory\n");
2892                         error = ENOMEM;
2893                         goto err_tx_desc;
2894                 }
2895                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2896                 bzero((void *)txr->tx_base, tsize);
2897
2898                 /* Now allocate transmit buffers for the ring */
2899                 if (igb_allocate_transmit_buffers(txr)) {
2900                         device_printf(dev,
2901                             "Critical Failure setting up transmit buffers\n");
2902                         error = ENOMEM;
2903                         goto err_tx_desc;
2904                 }
2905 #if __FreeBSD_version >= 800000
2906                 /* Allocate a buf ring */
2907                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2908                     M_WAITOK, &txr->tx_mtx);
2909 #endif
2910         }
2911
2912         /*
2913          * Next the RX queues...
2914          */ 
2915         rsize = roundup2(adapter->num_rx_desc *
2916             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2917         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2918                 rxr = &adapter->rx_rings[i];
2919                 rxr->adapter = adapter;
2920                 rxr->me = i;
2921
2922                 /* Initialize the RX lock */
2923                 ksnprintf(rxr->spin_name, sizeof(rxr->spin_name), "%s:rx(%d)",
2924                     device_get_nameunit(dev), txr->me);
2925
2926                 spin_init(&rxr->rx_spin);
2927
2928                 if (igb_dma_malloc(adapter, rsize,
2929                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2930                         device_printf(dev,
2931                             "Unable to allocate RxDescriptor memory\n");
2932                         error = ENOMEM;
2933                         goto err_rx_desc;
2934                 }
2935                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2936                 bzero((void *)rxr->rx_base, rsize);
2937
2938                 /* Allocate receive buffers for the ring*/
2939                 if (igb_allocate_receive_buffers(rxr)) {
2940                         device_printf(dev,
2941                             "Critical Failure setting up receive buffers\n");
2942                         error = ENOMEM;
2943                         goto err_rx_desc;
2944                 }
2945         }
2946
2947         /*
2948         ** Finally set up the queue holding structs
2949         */
2950         for (int i = 0; i < adapter->num_queues; i++) {
2951                 que = &adapter->queues[i];
2952                 que->adapter = adapter;
2953                 que->txr = &adapter->tx_rings[i];
2954                 que->rxr = &adapter->rx_rings[i];
2955         }
2956
2957         return (0);
2958
2959 err_rx_desc:
2960         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2961                 igb_dma_free(adapter, &rxr->rxdma);
2962 err_tx_desc:
2963         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2964                 igb_dma_free(adapter, &txr->txdma);
2965         kfree(adapter->rx_rings, M_DEVBUF);
2966 rx_fail:
2967 #if __FreeBSD_version >= 800000
2968         buf_ring_free(txr->br, M_DEVBUF);
2969 #endif
2970         kfree(adapter->tx_rings, M_DEVBUF);
2971 tx_fail:
2972         kfree(adapter->queues, M_DEVBUF);
2973 fail:
2974         return (error);
2975 }
2976
2977 /*********************************************************************
2978  *
2979  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2980  *  the information needed to transmit a packet on the wire. This is
2981  *  called only once at attach, setup is done every reset.
2982  *
2983  **********************************************************************/
2984 static int
2985 igb_allocate_transmit_buffers(struct tx_ring *txr)
2986 {
2987         struct adapter *adapter = txr->adapter;
2988         device_t dev = adapter->dev;
2989         struct igb_tx_buffer *txbuf;
2990         int error, i;
2991
2992         /*
2993          * Setup DMA descriptor areas.
2994          */
2995         if ((error = bus_dma_tag_create(NULL,
2996                                1, 0,                    /* alignment, bounds */
2997                                BUS_SPACE_MAXADDR,       /* lowaddr */
2998                                BUS_SPACE_MAXADDR,       /* highaddr */
2999                                NULL, NULL,              /* filter, filterarg */
3000                                IGB_TSO_SIZE,            /* maxsize */
3001                                IGB_MAX_SCATTER,         /* nsegments */
3002                                PAGE_SIZE,               /* maxsegsize */
3003                                0,                       /* flags */
3004                                &txr->txtag))) {
3005                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3006                 goto fail;
3007         }
3008
3009         if (!(txr->tx_buffers =
3010             (struct igb_tx_buffer *) kmalloc(sizeof(struct igb_tx_buffer) *
3011             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3012                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3013                 error = ENOMEM;
3014                 goto fail;
3015         }
3016
3017         /* Create the descriptor buffer dma maps */
3018         txbuf = txr->tx_buffers;
3019         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3020                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3021                 if (error != 0) {
3022                         device_printf(dev, "Unable to create TX DMA map\n");
3023                         goto fail;
3024                 }
3025         }
3026
3027         return 0;
3028 fail:
3029         /* We free all, it handles case where we are in the middle */
3030         igb_free_transmit_structures(adapter);
3031         return (error);
3032 }
3033
3034 /*********************************************************************
3035  *
3036  *  Initialize a transmit ring.
3037  *
3038  **********************************************************************/
3039 static void
3040 igb_setup_transmit_ring(struct tx_ring *txr)
3041 {
3042         struct adapter *adapter = txr->adapter;
3043         struct igb_tx_buffer *txbuf;
3044         int i;
3045
3046         /* Clear the old descriptor contents */
3047         IGB_TX_LOCK(txr);
3048         bzero((void *)txr->tx_base,
3049               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3050         /* Reset indices */
3051         txr->next_avail_desc = 0;
3052         txr->next_to_clean = 0;
3053
3054         /* Free any existing tx buffers. */
3055         txbuf = txr->tx_buffers;
3056         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3057                 if (txbuf->m_head != NULL) {
3058                         bus_dmamap_sync(txr->txtag, txbuf->map,
3059                             BUS_DMASYNC_POSTWRITE);
3060                         bus_dmamap_unload(txr->txtag, txbuf->map);
3061                         m_freem(txbuf->m_head);
3062                         txbuf->m_head = NULL;
3063                 }
3064                 /* clear the watch index */
3065                 txbuf->next_eop = -1;
3066         }
3067
3068         /* Set number of descriptors available */
3069         txr->tx_avail = adapter->num_tx_desc;
3070
3071         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3072             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3073         IGB_TX_UNLOCK(txr);
3074 }
3075
3076 /*********************************************************************
3077  *
3078  *  Initialize all transmit rings.
3079  *
3080  **********************************************************************/
3081 static void
3082 igb_setup_transmit_structures(struct adapter *adapter)
3083 {
3084         struct tx_ring *txr = adapter->tx_rings;
3085
3086         for (int i = 0; i < adapter->num_queues; i++, txr++)
3087                 igb_setup_transmit_ring(txr);
3088
3089         return;
3090 }
3091
3092 /*********************************************************************
3093  *
3094  *  Enable transmit unit.
3095  *
3096  **********************************************************************/
3097 static void
3098 igb_initialize_transmit_units(struct adapter *adapter)
3099 {
3100         struct tx_ring  *txr = adapter->tx_rings;
3101         struct e1000_hw *hw = &adapter->hw;
3102         u32             tctl, txdctl;
3103
3104         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3105
3106         /* Setup the Tx Descriptor Rings */
3107         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3108                 u64 bus_addr = txr->txdma.dma_paddr;
3109
3110                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3111                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3112                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3113                     (uint32_t)(bus_addr >> 32));
3114                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3115                     (uint32_t)bus_addr);
3116
3117                 /* Setup the HW Tx Head and Tail descriptor pointers */
3118                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3119                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3120
3121                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3122                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3123                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3124
3125                 txr->watchdog_check = FALSE;
3126
3127                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3128                 txdctl |= IGB_TX_PTHRESH;
3129                 txdctl |= IGB_TX_HTHRESH << 8;
3130                 txdctl |= IGB_TX_WTHRESH << 16;
3131                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3132                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3133         }
3134
3135         /* Program the Transmit Control Register */
3136         tctl = E1000_READ_REG(hw, E1000_TCTL);
3137         tctl &= ~E1000_TCTL_CT;
3138         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3139                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3140
3141         e1000_config_collision_dist(hw);
3142
3143         /* This write will effectively turn on the transmit unit. */
3144         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3145 }
3146
3147 /*********************************************************************
3148  *
3149  *  Free all transmit rings.
3150  *
3151  **********************************************************************/
3152 static void
3153 igb_free_transmit_structures(struct adapter *adapter)
3154 {
3155         struct tx_ring *txr = adapter->tx_rings;
3156
3157         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3158                 IGB_TX_LOCK(txr);
3159                 igb_free_transmit_buffers(txr);
3160                 igb_dma_free(adapter, &txr->txdma);
3161                 IGB_TX_UNLOCK(txr);
3162                 IGB_TX_LOCK_DESTROY(txr);
3163         }
3164         kfree(adapter->tx_rings, M_DEVBUF);
3165 }
3166
3167 /*********************************************************************
3168  *
3169  *  Free transmit ring related data structures.
3170  *
3171  **********************************************************************/
3172 static void
3173 igb_free_transmit_buffers(struct tx_ring *txr)
3174 {
3175         struct adapter *adapter = txr->adapter;
3176         struct igb_tx_buffer *tx_buffer;
3177         int             i;
3178
3179         INIT_DEBUGOUT("free_transmit_ring: begin");
3180
3181         if (txr->tx_buffers == NULL)
3182                 return;
3183
3184         tx_buffer = txr->tx_buffers;
3185         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3186                 if (tx_buffer->m_head != NULL) {
3187                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3188                             BUS_DMASYNC_POSTWRITE);
3189                         bus_dmamap_unload(txr->txtag,
3190                             tx_buffer->map);
3191                         m_freem(tx_buffer->m_head);
3192                         tx_buffer->m_head = NULL;
3193                         if (tx_buffer->map != NULL) {
3194                                 bus_dmamap_destroy(txr->txtag,
3195                                     tx_buffer->map);
3196                                 tx_buffer->map = NULL;
3197                         }
3198                 } else if (tx_buffer->map != NULL) {
3199                         bus_dmamap_unload(txr->txtag,
3200                             tx_buffer->map);
3201                         bus_dmamap_destroy(txr->txtag,
3202                             tx_buffer->map);
3203                         tx_buffer->map = NULL;
3204                 }
3205         }
3206 #if __FreeBSD_version >= 800000
3207         if (txr->br != NULL)
3208                 buf_ring_free(txr->br, M_DEVBUF);
3209 #endif
3210         if (txr->tx_buffers != NULL) {
3211                 kfree(txr->tx_buffers, M_DEVBUF);
3212                 txr->tx_buffers = NULL;
3213         }
3214         if (txr->txtag != NULL) {
3215                 bus_dma_tag_destroy(txr->txtag);
3216                 txr->txtag = NULL;
3217         }
3218         return;
3219 }
3220
3221 /**********************************************************************
3222  *
3223  *  Setup work for hardware segmentation offload (TSO)
3224  *
3225  **********************************************************************/
3226 #ifdef NET_TSO 
3227 static boolean_t
3228 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3229 {
3230         struct adapter *adapter = txr->adapter;
3231         struct e1000_adv_tx_context_desc *TXD;
3232         struct igb_tx_buffer        *tx_buffer;
3233         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3234         u32 mss_l4len_idx = 0;
3235         u16 vtag = 0;
3236         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3237         struct ether_vlan_header *eh;
3238         struct ip *ip;
3239         struct tcphdr *th;
3240
3241
3242         /*
3243          * Determine where frame payload starts.
3244          * Jump over vlan headers if already present
3245          */
3246         eh = mtod(mp, struct ether_vlan_header *);
3247         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3248                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3249         else
3250                 ehdrlen = ETHER_HDR_LEN;
3251
3252         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3253         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3254                 return FALSE;
3255
3256         /* Only supports IPV4 for now */
3257         ctxd = txr->next_avail_desc;
3258         tx_buffer = &txr->tx_buffers[ctxd];
3259         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3260
3261         ip = (struct ip *)(mp->m_data + ehdrlen);
3262         if (ip->ip_p != IPPROTO_TCP)
3263                 return FALSE;   /* 0 */
3264         ip->ip_sum = 0;
3265         ip_hlen = ip->ip_hl << 2;
3266         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3267         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3268             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3269         tcp_hlen = th->th_off << 2;
3270         /*
3271          * Calculate header length, this is used
3272          * in the transmit desc in igb_xmit
3273          */
3274         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3275
3276         /* VLAN MACLEN IPLEN */
3277         if (mp->m_flags & M_VLANTAG) {
3278                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3279                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3280         }
3281
3282         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3283         vlan_macip_lens |= ip_hlen;
3284         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3285
3286         /* ADV DTYPE TUCMD */
3287         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3288         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3289         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3290         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3291
3292         /* MSS L4LEN IDX */
3293         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3294         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3295         /* 82575 needs the queue index added */
3296         if (adapter->hw.mac.type == e1000_82575)
3297                 mss_l4len_idx |= txr->me << 4;
3298         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3299
3300         TXD->seqnum_seed = htole32(0);
3301         tx_buffer->m_head = NULL;
3302         tx_buffer->next_eop = -1;
3303
3304         if (++ctxd == adapter->num_tx_desc)
3305                 ctxd = 0;
3306
3307         txr->tx_avail--;
3308         txr->next_avail_desc = ctxd;
3309         return TRUE;
3310 }
3311 #endif
3312
3313 /*********************************************************************
3314  *
3315  *  Context Descriptor setup for VLAN or CSUM
3316  *
3317  **********************************************************************/
3318
3319 static bool
3320 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3321 {
3322         struct adapter *adapter = txr->adapter;
3323         struct e1000_adv_tx_context_desc *TXD;
3324         struct igb_tx_buffer        *tx_buffer;
3325         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3326         struct ether_vlan_header *eh;
3327         struct ip *ip = NULL;
3328         struct ip6_hdr *ip6;
3329         int  ehdrlen, ctxd, ip_hlen = 0;
3330         u16     etype, vtag = 0;
3331         u8      ipproto = 0;
3332         bool    offload = TRUE;
3333
3334         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3335                 offload = FALSE;
3336
3337         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3338         ctxd = txr->next_avail_desc;
3339         tx_buffer = &txr->tx_buffers[ctxd];
3340         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3341
3342         /*
3343         ** In advanced descriptors the vlan tag must 
3344         ** be placed into the context descriptor, thus
3345         ** we need to be here just for that setup.
3346         */
3347         if (mp->m_flags & M_VLANTAG) {
3348                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3349                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3350         } else if (offload == FALSE)
3351                 return FALSE;
3352
3353         /*
3354          * Determine where frame payload starts.
3355          * Jump over vlan headers if already present,
3356          * helpful for QinQ too.
3357          */
3358         eh = mtod(mp, struct ether_vlan_header *);
3359         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3360                 etype = ntohs(eh->evl_proto);
3361                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3362         } else {
3363                 etype = ntohs(eh->evl_encap_proto);
3364                 ehdrlen = ETHER_HDR_LEN;
3365         }
3366
3367         /* Set the ether header length */
3368         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3369
3370         switch (etype) {
3371                 case ETHERTYPE_IP:
3372                         ip = (struct ip *)(mp->m_data + ehdrlen);
3373                         ip_hlen = ip->ip_hl << 2;
3374                         if (mp->m_len < ehdrlen + ip_hlen) {
3375                                 offload = FALSE;
3376                                 break;
3377                         }
3378                         ipproto = ip->ip_p;
3379                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3380                         break;
3381                 case ETHERTYPE_IPV6:
3382                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3383                         ip_hlen = sizeof(struct ip6_hdr);
3384                         if (mp->m_len < ehdrlen + ip_hlen)
3385                                 return (FALSE);
3386                         ipproto = ip6->ip6_nxt;
3387                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3388                         break;
3389                 default:
3390                         offload = FALSE;
3391                         break;
3392         }
3393
3394         vlan_macip_lens |= ip_hlen;
3395         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3396
3397         switch (ipproto) {
3398                 case IPPROTO_TCP:
3399                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3400                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3401                         break;
3402                 case IPPROTO_UDP:
3403                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3404                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3405                         break;
3406 #if __FreeBSD_version >= 800000
3407                 case IPPROTO_SCTP:
3408                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3409                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3410                         break;
3411 #endif
3412                 default:
3413                         offload = FALSE;
3414                         break;
3415         }
3416
3417         /* 82575 needs the queue index added */
3418         if (adapter->hw.mac.type == e1000_82575)
3419                 mss_l4len_idx = txr->me << 4;
3420
3421         /* Now copy bits into descriptor */
3422         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3423         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3424         TXD->seqnum_seed = htole32(0);
3425         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3426
3427         tx_buffer->m_head = NULL;
3428         tx_buffer->next_eop = -1;
3429
3430         /* We've consumed the first desc, adjust counters */
3431         if (++ctxd == adapter->num_tx_desc)
3432                 ctxd = 0;
3433         txr->next_avail_desc = ctxd;
3434         --txr->tx_avail;
3435
3436         return (offload);
3437 }
3438
3439
3440 /**********************************************************************
3441  *
3442  *  Examine each tx_buffer in the used queue. If the hardware is done
3443  *  processing the packet then free associated resources. The
3444  *  tx_buffer is put back on the free queue.
3445  *
3446  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3447  **********************************************************************/
3448 static bool
3449 igb_txeof(struct tx_ring *txr)
3450 {
3451         struct adapter  *adapter = txr->adapter;
3452         int first, last, done;
3453         struct igb_tx_buffer *tx_buffer;
3454         struct e1000_tx_desc   *tx_desc, *eop_desc;
3455         struct ifnet   *ifp = adapter->ifp;
3456
3457         IGB_TX_LOCK_ASSERT(txr);
3458
3459         if (txr->tx_avail == adapter->num_tx_desc)
3460                 return FALSE;
3461
3462         first = txr->next_to_clean;
3463         tx_desc = &txr->tx_base[first];
3464         tx_buffer = &txr->tx_buffers[first];
3465         last = tx_buffer->next_eop;
3466         eop_desc = &txr->tx_base[last];
3467
3468         /*
3469          * What this does is get the index of the
3470          * first descriptor AFTER the EOP of the 
3471          * first packet, that way we can do the
3472          * simple comparison on the inner while loop.
3473          */
3474         if (++last == adapter->num_tx_desc)
3475                 last = 0;
3476         done = last;
3477
3478         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3479             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3480
3481         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3482                 /* We clean the range of the packet */
3483                 while (first != done) {
3484                         tx_desc->upper.data = 0;
3485                         tx_desc->lower.data = 0;
3486                         tx_desc->buffer_addr = 0;
3487                         ++txr->tx_avail;
3488
3489                         if (tx_buffer->m_head) {
3490                                 txr->bytes +=
3491                                     tx_buffer->m_head->m_pkthdr.len;
3492                                 bus_dmamap_sync(txr->txtag,
3493                                     tx_buffer->map,
3494                                     BUS_DMASYNC_POSTWRITE);
3495                                 bus_dmamap_unload(txr->txtag,
3496                                     tx_buffer->map);
3497
3498                                 m_freem(tx_buffer->m_head);
3499                                 tx_buffer->m_head = NULL;
3500                         }
3501                         tx_buffer->next_eop = -1;
3502                         txr->watchdog_time = ticks;
3503
3504                         if (++first == adapter->num_tx_desc)
3505                                 first = 0;
3506
3507                         tx_buffer = &txr->tx_buffers[first];
3508                         tx_desc = &txr->tx_base[first];
3509                 }
3510                 ++txr->packets;
3511                 ++ifp->if_opackets;
3512                 /* See if we can continue to the next packet */
3513                 last = tx_buffer->next_eop;
3514                 if (last != -1) {
3515                         eop_desc = &txr->tx_base[last];
3516                         /* Get new done point */
3517                         if (++last == adapter->num_tx_desc) last = 0;
3518                         done = last;
3519                 } else
3520                         break;
3521         }
3522         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3523             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3524
3525         txr->next_to_clean = first;
3526
3527         /*
3528          * If we have enough room, clear IFF_DRV_OACTIVE
3529          * to tell the stack that it is OK to send packets.
3530          */
3531         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3532                 ifp->if_flags &= ~IFF_OACTIVE;
3533                 /* All clean, turn off the watchdog */
3534                 if (txr->tx_avail == adapter->num_tx_desc) {
3535                         txr->watchdog_check = FALSE;
3536                         return FALSE;
3537                 }
3538         }
3539
3540         return (TRUE);
3541 }
3542
3543
3544 /*********************************************************************
3545  *
3546  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3547  *              i - designates the ring index
3548  *              clean - tells the function whether to update
3549  *                      the header, the packet buffer, or both.
3550  *
3551  **********************************************************************/
3552 static int
3553 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3554 {
3555         struct adapter          *adapter = rxr->adapter;
3556         struct igb_rx_buf       *rxbuf;
3557         struct mbuf             *mh, *mp;
3558         bus_dma_segment_t       hseg[1];
3559         bus_dma_segment_t       pseg[1];
3560         bus_dmamap_t            map;
3561         int                     nsegs, error;
3562
3563
3564         rxbuf = &rxr->rx_buffers[i];
3565         mh = mp = NULL;
3566         if ((clean & IGB_CLEAN_HEADER) != 0) {
3567                 mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3568                 if (mh == NULL) {
3569                         adapter->mbuf_header_failed++;          
3570                         return (ENOBUFS);
3571                 }
3572                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3573                 /*
3574                  * Because IGB_HDR_BUF size is less than MHLEN
3575                  * and we configure controller to split headers
3576                  * we can align mbuf on ETHER_ALIGN boundary.
3577                  */
3578                 m_adj(mh, ETHER_ALIGN);
3579                 error = bus_dmamap_load_mbuf_segment(rxr->rx_htag,
3580                     rxr->rx_hspare_map, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3581                 if (error != 0) {
3582                         m_freem(mh);
3583                         return (error);
3584                 }
3585                 mh->m_flags &= ~M_PKTHDR;
3586         }
3587         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3588                 mp = m_getl(adapter->rx_mbuf_sz,
3589                     MB_DONTWAIT, MT_DATA, M_PKTHDR, NULL);
3590 #if 0
3591                 mp = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR,
3592                     adapter->rx_mbuf_sz);
3593 #endif
3594                 if (mp == NULL) {
3595                         if (mh != NULL) {
3596                                 adapter->mbuf_packet_failed++;          
3597                                 bus_dmamap_unload(rxr->rx_htag,
3598                                     rxbuf->head_map);
3599                                 mh->m_flags |= M_PKTHDR;
3600                                 m_freem(mh);
3601                         }
3602                         return (ENOBUFS);
3603                 }
3604                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3605                 error = bus_dmamap_load_mbuf_segment(rxr->rx_ptag,
3606                     rxr->rx_pspare_map, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3607                 if (error != 0) {
3608                         if (mh != NULL) {
3609                                 bus_dmamap_unload(rxr->rx_htag,
3610                                     rxbuf->head_map);
3611                                 mh->m_flags |= M_PKTHDR;
3612                                 m_freem(mh);
3613                         }
3614                         m_freem(mp);
3615                         return (error);
3616                 }
3617                 mp->m_flags &= ~M_PKTHDR;
3618         }
3619
3620         /* Loading new DMA maps complete, unload maps for received buffers. */
3621         if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3622                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3623                     BUS_DMASYNC_POSTREAD);
3624                 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3625         }
3626         if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3627                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3628                     BUS_DMASYNC_POSTREAD);
3629                 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3630         }
3631
3632         /* Reflect loaded dmamaps. */
3633         if ((clean & IGB_CLEAN_HEADER) != 0) {
3634                 map = rxbuf->head_map;
3635                 rxbuf->head_map = rxr->rx_hspare_map;
3636                 rxr->rx_hspare_map = map;
3637                 rxbuf->m_head = mh;
3638                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3639                     BUS_DMASYNC_PREREAD);
3640                 rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3641         }
3642         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3643                 map = rxbuf->pack_map;
3644                 rxbuf->pack_map = rxr->rx_pspare_map;
3645                 rxr->rx_pspare_map = map;
3646                 rxbuf->m_pack = mp;
3647                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3648                     BUS_DMASYNC_PREREAD);
3649                 rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3650         }
3651
3652         return (0);
3653 }
3654
3655 /*********************************************************************
3656  *
3657  *  Allocate memory for rx_buffer structures. Since we use one
3658  *  rx_buffer per received packet, the maximum number of rx_buffer's
3659  *  that we'll need is equal to the number of receive descriptors
3660  *  that we've allocated.
3661  *
3662  **********************************************************************/
3663 static int
3664 igb_allocate_receive_buffers(struct rx_ring *rxr)
3665 {
3666         struct  adapter         *adapter = rxr->adapter;
3667         device_t                dev = adapter->dev;
3668         struct igb_rx_buf       *rxbuf;
3669         int                     i, bsize, error;
3670
3671         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3672         if (!(rxr->rx_buffers =
3673             (struct igb_rx_buf *) kmalloc(bsize,
3674             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3675                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3676                 error = ENOMEM;
3677                 goto fail;
3678         }
3679
3680         if ((error = bus_dma_tag_create(NULL,
3681                                    1, 0,                /* alignment, bounds */
3682                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3683                                    BUS_SPACE_MAXADDR,   /* highaddr */
3684                                    NULL, NULL,          /* filter, filterarg */
3685                                    MSIZE,               /* maxsize */
3686                                    1,                   /* nsegments */
3687                                    MSIZE,               /* maxsegsize */
3688                                    0,                   /* flags */
3689                                    &rxr->rx_htag))) {
3690                 device_printf(dev, "Unable to create RX DMA tag\n");
3691                 goto fail;
3692         }
3693
3694         if ((error = bus_dma_tag_create(NULL,
3695                                    1, 0,                /* alignment, bounds */
3696                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3697                                    BUS_SPACE_MAXADDR,   /* highaddr */
3698                                    NULL, NULL,          /* filter, filterarg */
3699                                    MJUMPAGESIZE,        /* maxsize */
3700                                    1,                   /* nsegments */
3701                                    MJUMPAGESIZE,        /* maxsegsize */
3702                                    0,                   /* flags */
3703                                    &rxr->rx_ptag))) {
3704                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3705                 goto fail;
3706         }
3707
3708         /* Create the spare maps (used by getbuf) */
3709         error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3710              &rxr->rx_hspare_map);
3711         if (error) {
3712                 device_printf(dev,
3713                     "%s: bus_dmamap_create header spare failed: %d\n",
3714                     __func__, error);
3715                 goto fail;
3716         }
3717         error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3718              &rxr->rx_pspare_map);
3719         if (error) {
3720                 device_printf(dev,
3721                     "%s: bus_dmamap_create packet spare failed: %d\n",
3722                     __func__, error);
3723                 goto fail;
3724         }
3725
3726         for (i = 0; i < adapter->num_rx_desc; i++) {
3727                 rxbuf = &rxr->rx_buffers[i];
3728                 error = bus_dmamap_create(rxr->rx_htag,
3729                     BUS_DMA_NOWAIT, &rxbuf->head_map);
3730                 if (error) {
3731                         device_printf(dev,
3732                             "Unable to create RX head DMA maps\n");
3733                         goto fail;
3734                 }
3735                 error = bus_dmamap_create(rxr->rx_ptag,
3736                     BUS_DMA_NOWAIT, &rxbuf->pack_map);
3737                 if (error) {
3738                         device_printf(dev,
3739                             "Unable to create RX packet DMA maps\n");
3740                         goto fail;
3741                 }
3742         }
3743
3744         return (0);
3745
3746 fail:
3747         /* Frees all, but can handle partial completion */
3748         igb_free_receive_structures(adapter);
3749         return (error);
3750 }
3751
3752
3753 static void
3754 igb_free_receive_ring(struct rx_ring *rxr)
3755 {
3756         struct  adapter         *adapter;
3757         struct igb_rx_buf       *rxbuf;
3758         int i;
3759
3760         adapter = rxr->adapter;
3761         for (i = 0; i < adapter->num_rx_desc; i++) {
3762                 rxbuf = &rxr->rx_buffers[i];
3763                 if (rxbuf->m_head != NULL) {
3764                         bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3765                             BUS_DMASYNC_POSTREAD);
3766                         bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3767                         rxbuf->m_head->m_flags |= M_PKTHDR;
3768                         m_freem(rxbuf->m_head);
3769                 }
3770                 if (rxbuf->m_pack != NULL) {
3771                         bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3772                             BUS_DMASYNC_POSTREAD);
3773                         bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3774                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3775                         m_freem(rxbuf->m_pack);
3776                 }
3777                 rxbuf->m_head = NULL;
3778                 rxbuf->m_pack = NULL;
3779         }
3780 }
3781
3782
3783 /*********************************************************************
3784  *
3785  *  Initialize a receive ring and its buffers.
3786  *
3787  **********************************************************************/
3788 static int
3789 igb_setup_receive_ring(struct rx_ring *rxr)
3790 {
3791         struct  adapter         *adapter;
3792         struct  ifnet           *ifp;
3793         device_t                dev;
3794 #ifdef NET_LRO 
3795         struct lro_ctrl         *lro = &rxr->lro;
3796 #endif
3797         int                     j, rsize, error = 0;
3798
3799         adapter = rxr->adapter;
3800         dev = adapter->dev;
3801         ifp = adapter->ifp;
3802
3803         /* Clear the ring contents */
3804         IGB_RX_LOCK(rxr);
3805         rsize = roundup2(adapter->num_rx_desc *
3806             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3807         bzero((void *)rxr->rx_base, rsize);
3808
3809         /*
3810         ** Free current RX buffer structures and their mbufs
3811         */
3812         igb_free_receive_ring(rxr);
3813
3814         /* Now replenish the ring mbufs */
3815         for (j = 0; j < adapter->num_rx_desc; j++) {
3816                 if ((error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH)) != 0)
3817                         goto fail;
3818         }
3819
3820         /* Setup our descriptor indices */
3821         rxr->next_to_check = 0;
3822         rxr->last_cleaned = 0;
3823         rxr->lro_enabled = FALSE;
3824
3825         if (igb_header_split)
3826                 rxr->hdr_split = TRUE;
3827 #if NET_LRO 
3828         else
3829                 ifp->if_capabilities &= ~IFCAP_LRO;
3830 #endif
3831
3832         rxr->fmp = NULL;
3833         rxr->lmp = NULL;
3834         rxr->discard = FALSE;
3835
3836         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3837             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3838
3839         /*
3840         ** Now set up the LRO interface, we
3841         ** also only do head split when LRO
3842         ** is enabled, since so often they
3843         ** are undesireable in similar setups.
3844         */
3845 #if NET_LRO 
3846         if (ifp->if_capenable & IFCAP_LRO) {
3847                 int err = tcp_lro_init(lro);
3848                 if (err) {
3849                         device_printf(dev, "LRO Initialization failed!\n");
3850                         goto fail;
3851                 }
3852                 INIT_DEBUGOUT("RX LRO Initialized\n");
3853                 rxr->lro_enabled = TRUE;
3854                 lro->ifp = adapter->ifp;
3855         }
3856 #endif
3857
3858         IGB_RX_UNLOCK(rxr);
3859         return (0);
3860
3861 fail:
3862         igb_free_receive_ring(rxr);
3863         IGB_RX_UNLOCK(rxr);
3864         return (error);
3865 }
3866
3867 /*********************************************************************
3868  *
3869  *  Initialize all receive rings.
3870  *
3871  **********************************************************************/
3872 static int
3873 igb_setup_receive_structures(struct adapter *adapter)
3874 {
3875         struct rx_ring *rxr = adapter->rx_rings;
3876         int i, j;
3877
3878         for (i = 0; i < adapter->num_queues; i++, rxr++)
3879                 if (igb_setup_receive_ring(rxr))
3880                         goto fail;
3881
3882         return (0);
3883 fail:
3884         /*
3885          * Free RX buffers allocated so far, we will only handle
3886          * the rings that completed, the failing case will have
3887          * cleaned up for itself. The value of 'i' will be the
3888          * failed ring so we must pre-decrement it.
3889          */
3890         rxr = adapter->rx_rings;
3891         for (--i; i > 0; i--, rxr++) {
3892                 for (j = 0; j < adapter->num_rx_desc; j++)
3893                         igb_free_receive_ring(rxr);
3894         }
3895
3896         return (ENOBUFS);
3897 }
3898
3899 /*********************************************************************
3900  *
3901  *  Enable receive unit.
3902  *
3903  **********************************************************************/
3904 static void
3905 igb_initialize_receive_units(struct adapter *adapter)
3906 {
3907         struct rx_ring  *rxr = adapter->rx_rings;
3908         struct ifnet    *ifp = adapter->ifp;
3909         struct e1000_hw *hw = &adapter->hw;
3910         u32             rctl, rxcsum, psize, srrctl = 0;
3911
3912         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3913
3914         /*
3915          * Make sure receives are disabled while setting
3916          * up the descriptor ring
3917          */
3918         rctl = E1000_READ_REG(hw, E1000_RCTL);
3919         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3920
3921         /*
3922         ** Set up for header split
3923         */
3924         if (rxr->hdr_split) {
3925                 /* Use a standard mbuf for the header */
3926                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3927                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3928         } else
3929                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3930
3931         /*
3932         ** Set up for jumbo frames
3933         */
3934         if (ifp->if_mtu > ETHERMTU) {
3935                 rctl |= E1000_RCTL_LPE;
3936                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3937                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3938
3939                 /* Set maximum packet len */
3940                 psize = adapter->max_frame_size;
3941                 /* are we on a vlan? */
3942                 if (adapter->ifp->if_vlantrunks != NULL)
3943                         psize += VLAN_TAG_SIZE;
3944                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3945         } else {
3946                 rctl &= ~E1000_RCTL_LPE;
3947                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3948                 rctl |= E1000_RCTL_SZ_2048;
3949         }
3950
3951         /* Setup the Base and Length of the Rx Descriptor Rings */
3952         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3953                 u64 bus_addr = rxr->rxdma.dma_paddr;
3954                 u32 rxdctl;
3955
3956                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3957                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3958                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3959                     (uint32_t)(bus_addr >> 32));
3960                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3961                     (uint32_t)bus_addr);
3962                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3963                 /* Enable this Queue */
3964                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3965                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3966                 rxdctl &= 0xFFF00000;
3967                 rxdctl |= IGB_RX_PTHRESH;
3968                 rxdctl |= IGB_RX_HTHRESH << 8;
3969                 rxdctl |= IGB_RX_WTHRESH << 16;
3970                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3971         }
3972
3973         /*
3974         ** Setup for RX MultiQueue
3975         */
3976         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3977         if (adapter->num_queues >1) {
3978                 u32 random[10], mrqc, shift = 0;
3979                 union igb_reta {
3980                         u32 dword;
3981                         u8  bytes[4];
3982                 } reta;
3983
3984                 karc4rand(&random, sizeof(random));
3985                 if (adapter->hw.mac.type == e1000_82575)
3986                         shift = 6;
3987                 /* Warning FM follows */
3988                 for (int i = 0; i < 128; i++) {
3989                         reta.bytes[i & 3] =
3990                             (i % adapter->num_queues) << shift;
3991                         if ((i & 3) == 3)
3992                                 E1000_WRITE_REG(hw,
3993                                     E1000_RETA(i >> 2), reta.dword);
3994                 }
3995                 /* Now fill in hash table */
3996                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3997                 for (int i = 0; i < 10; i++)
3998                         E1000_WRITE_REG_ARRAY(hw,
3999                             E1000_RSSRK(0), i, random[i]);
4000
4001                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4002                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4003                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4004                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4005                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4006                     E1000_MRQC_RSS_FIELD_IPV6_UDP);