ca8394f3d40b38040af6c1a0e9e05ebcd0768ee5
[dragonfly.git] / sys / dev / netif / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33
34
35 #ifdef HAVE_KERNEL_OPTION_HEADERS
36 #include "opt_device_polling.h"
37 #include "opt_inet.h"
38 #include "opt_altq.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #if __FreeBSD_version >= 800000
44 #include <sys/buf_ring.h>
45 #endif
46 #include <sys/bus.h>
47 #include <sys/endian.h>
48 #include <sys/kernel.h>
49 #include <sys/kthread.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/module.h>
53 #include <sys/rman.h>
54 #include <sys/socket.h>
55 #include <sys/sockio.h>
56 #include <sys/sysctl.h>
57 #include <sys/taskqueue.h>
58 #include <sys/eventhandler.h>
59
60 #ifdef IGB_IEEE1588
61 #include <sys/ieee1588.h>
62 #endif
63
64 #include <net/bpf.h>
65 #include <net/ethernet.h>
66 #include <net/if.h>
67 #include <net/if_arp.h>
68 #include <net/if_dl.h>
69 #include <net/if_media.h>
70 #include <net/ifq_var.h>
71
72 #include <net/if_types.h>
73 #include <net/vlan/if_vlan_var.h>
74 #include <net/vlan/if_vlan_ether.h>
75
76 #include <netinet/in_systm.h>
77 #include <netinet/in.h>
78 #include <netinet/if_ether.h>
79 #include <netinet/ip.h>
80 #include <netinet/ip6.h>
81 #include <netinet/tcp.h>
82 #ifdef NET_LRO
83 #include <netinet/tcp_lro.h>
84 #endif
85 #include <netinet/udp.h>
86
87 #include <sys/in_cksum.h>
88 #include <bus/pci/pcivar.h>
89 #include <bus/pci/pcireg.h>
90
91 #include "e1000_api.h"
92 #include "e1000_82575.h"
93 #include "if_igb.h"
94
95 /*********************************************************************
96  *  Set this to one to display debug statistics
97  *********************************************************************/
98 int     igb_display_debug_stats = 0;
99
100 /*********************************************************************
101  *  Driver version:
102  *********************************************************************/
103 char igb_driver_version[] = "version - 1.9.1";
104
105
106 /*********************************************************************
107  *  PCI Device ID Table
108  *
109  *  Used by probe to select devices to load on
110  *  Last field stores an index into e1000_strings
111  *  Last entry must be all 0s
112  *
113  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114  *********************************************************************/
115
116 static igb_vendor_info_t igb_vendor_info_array[] =
117 {
118         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
119         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
121         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
128         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
130         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
132         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
137                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
138         /* required last entry */
139         { 0, 0, 0, 0, 0}
140 };
141
142 /*********************************************************************
143  *  Table of branding strings for all supported NICs.
144  *********************************************************************/
145
146 static char *igb_strings[] = {
147         "Intel(R) PRO/1000 Network Connection"
148 };
149
150 /*********************************************************************
151  *  Function prototypes
152  *********************************************************************/
153 static int      igb_probe(device_t);
154 static int      igb_attach(device_t);
155 static int      igb_detach(device_t);
156 static int      igb_shutdown(device_t);
157 static int      igb_suspend(device_t);
158 static int      igb_resume(device_t);
159 static void     igb_start(struct ifnet *);
160 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
161 #if __FreeBSD_version >= 800000
162 static int      igb_mq_start(struct ifnet *, struct mbuf *);
163 static int      igb_mq_start_locked(struct ifnet *,
164                     struct tx_ring *, struct mbuf *);
165 static void     igb_qflush(struct ifnet *);
166 #endif
167 static int      igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
168 static void     igb_init(void *);
169 static void     igb_init_locked(struct adapter *);
170 static void     igb_stop(void *);
171 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
172 static int      igb_media_change(struct ifnet *);
173 static void     igb_identify_hardware(struct adapter *);
174 static int      igb_allocate_pci_resources(struct adapter *);
175 static int      igb_allocate_msix(struct adapter *);
176 static int      igb_allocate_legacy(struct adapter *);
177 static int      igb_setup_msix(struct adapter *);
178 static void     igb_free_pci_resources(struct adapter *);
179 static void     igb_local_timer(void *);
180 static void     igb_reset(struct adapter *);
181 static void     igb_setup_interface(device_t, struct adapter *);
182 static int      igb_allocate_queues(struct adapter *);
183 static void     igb_configure_queues(struct adapter *);
184
185 static int      igb_allocate_transmit_buffers(struct tx_ring *);
186 static void     igb_setup_transmit_structures(struct adapter *);
187 static void     igb_setup_transmit_ring(struct tx_ring *);
188 static void     igb_initialize_transmit_units(struct adapter *);
189 static void     igb_free_transmit_structures(struct adapter *);
190 static void     igb_free_transmit_buffers(struct tx_ring *);
191
192 static int      igb_allocate_receive_buffers(struct rx_ring *);
193 static int      igb_setup_receive_structures(struct adapter *);
194 static int      igb_setup_receive_ring(struct rx_ring *);
195 static void     igb_initialize_receive_units(struct adapter *);
196 static void     igb_free_receive_structures(struct adapter *);
197 static void     igb_free_receive_buffers(struct rx_ring *);
198 static void     igb_free_receive_ring(struct rx_ring *);
199
200 static void     igb_enable_intr(struct adapter *);
201 static void     igb_disable_intr(struct adapter *);
202 static void     igb_update_stats_counters(struct adapter *);
203 static bool     igb_txeof(struct tx_ring *);
204
205 static __inline void igb_rx_discard(struct rx_ring *,
206                     union e1000_adv_rx_desc *, int);
207 static __inline void igb_rx_input(struct rx_ring *,
208                     struct ifnet *, struct mbuf *, u32);
209
210 static bool     igb_rxeof(struct rx_ring *, int);
211 static void     igb_rx_checksum(u32, struct mbuf *, u32);
212 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
213 #if NET_TSO 
214 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
215 #endif
216 static void     igb_set_promisc(struct adapter *);
217 static void     igb_disable_promisc(struct adapter *);
218 static void     igb_set_multi(struct adapter *);
219 static void     igb_print_hw_stats(struct adapter *);
220 static void     igb_update_link_status(struct adapter *);
221 static int      igb_get_buf(struct rx_ring *, int, u8);
222
223 static void     igb_register_vlan(void *, struct ifnet *, u16);
224 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
225 static void     igb_setup_vlan_hw_support(struct adapter *);
226
227 static int      igb_xmit(struct tx_ring *, struct mbuf **);
228 static int      igb_dma_malloc(struct adapter *, bus_size_t,
229                     struct igb_dma_alloc *, int);
230 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
231 static void     igb_print_debug_info(struct adapter *);
232 static void     igb_print_nvm_info(struct adapter *);
233 static int      igb_is_valid_ether_addr(u8 *);
234 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
235 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
236 /* Management and WOL Support */
237 static void     igb_init_manageability(struct adapter *);
238 static void     igb_release_manageability(struct adapter *);
239 static void     igb_get_hw_control(struct adapter *);
240 static void     igb_release_hw_control(struct adapter *);
241 static void     igb_enable_wakeup(device_t);
242
243 static void     igb_irq_fast(void *);
244 static void     igb_add_rx_process_limit(struct adapter *, const char *,
245                     const char *, int *, int);
246 static void     igb_handle_rxtx(void *context, int pending);
247 static void     igb_handle_que(void *context, int pending);
248 static void     igb_handle_link(void *context, int pending);
249
250 /* These are MSIX only irq handlers */
251 static void     igb_msix_que(void *);
252 static void     igb_msix_link(void *);
253
254 #ifdef DEVICE_POLLING
255 static poll_handler_t igb_poll;
256 #endif /* POLLING */
257
258 /*********************************************************************
259  *  FreeBSD Device Interface Entry Points
260  *********************************************************************/
261
262 static device_method_t igb_methods[] = {
263         /* Device interface */
264         DEVMETHOD(device_probe, igb_probe),
265         DEVMETHOD(device_attach, igb_attach),
266         DEVMETHOD(device_detach, igb_detach),
267         DEVMETHOD(device_shutdown, igb_shutdown),
268         DEVMETHOD(device_suspend, igb_suspend),
269         DEVMETHOD(device_resume, igb_resume),
270         {0, 0}
271 };
272
273 static driver_t igb_driver = {
274         "igb", igb_methods, sizeof(struct adapter),
275 };
276
277 static devclass_t igb_devclass;
278 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
279 MODULE_DEPEND(igb, pci, 1, 1, 1);
280 MODULE_DEPEND(igb, ether, 1, 1, 1);
281
282 /*********************************************************************
283  *  Tunable default values.
284  *********************************************************************/
285
286 /* Descriptor defaults */
287 static int igb_rxd = IGB_DEFAULT_RXD;
288 static int igb_txd = IGB_DEFAULT_TXD;
289 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
290 TUNABLE_INT("hw.igb.txd", &igb_txd);
291
292 /*
293 ** AIM: Adaptive Interrupt Moderation
294 ** which means that the interrupt rate
295 ** is varied over time based on the
296 ** traffic for that interrupt vector
297 */
298 static int igb_enable_aim = TRUE;
299 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
300
301 /*
302  * MSIX should be the default for best performance,
303  * but this allows it to be forced off for testing.
304  */         
305 static int igb_enable_msix = 1;
306 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
307
308 /*
309  * Header split has seemed to be beneficial in
310  * many circumstances tested, however there have
311  * been some stability issues, so the default is
312  * off. 
313  */
314 static bool igb_header_split = FALSE;
315 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
316
317 /*
318 ** This will autoconfigure based on
319 ** the number of CPUs if left at 0.
320 */
321 static int igb_num_queues = 0;
322 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
323
324 /* How many packets rxeof tries to clean at a time */
325 static int igb_rx_process_limit = 100;
326 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
327
328 /* Flow control setting - default to FULL */
329 static int igb_fc_setting = e1000_fc_full;
330 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
331
332 /*
333 ** Shadow VFTA table, this is needed because
334 ** the real filter table gets cleared during
335 ** a soft reset and the driver needs to be able
336 ** to repopulate it.
337 */
338 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
339
340
341 /*********************************************************************
342  *  Device identification routine
343  *
344  *  igb_probe determines if the driver should be loaded on
345  *  adapter based on PCI vendor/device id of the adapter.
346  *
347  *  return BUS_PROBE_DEFAULT on success, positive on failure
348  *********************************************************************/
349
350 static int
351 igb_probe(device_t dev)
352 {
353         char            adapter_name[60];
354         uint16_t        pci_vendor_id = 0;
355         uint16_t        pci_device_id = 0;
356         uint16_t        pci_subvendor_id = 0;
357         uint16_t        pci_subdevice_id = 0;
358         igb_vendor_info_t *ent;
359
360         INIT_DEBUGOUT("igb_probe: begin");
361
362         pci_vendor_id = pci_get_vendor(dev);
363         if (pci_vendor_id != IGB_VENDOR_ID)
364                 return (ENXIO);
365
366         pci_device_id = pci_get_device(dev);
367         pci_subvendor_id = pci_get_subvendor(dev);
368         pci_subdevice_id = pci_get_subdevice(dev);
369
370         ent = igb_vendor_info_array;
371         while (ent->vendor_id != 0) {
372                 if ((pci_vendor_id == ent->vendor_id) &&
373                     (pci_device_id == ent->device_id) &&
374
375                     ((pci_subvendor_id == ent->subvendor_id) ||
376                     (ent->subvendor_id == PCI_ANY_ID)) &&
377
378                     ((pci_subdevice_id == ent->subdevice_id) ||
379                     (ent->subdevice_id == PCI_ANY_ID))) {
380                         ksprintf(adapter_name, "%s %s",
381                                 igb_strings[ent->index],
382                                 igb_driver_version);
383                         device_set_desc_copy(dev, adapter_name);
384                         return (BUS_PROBE_DEFAULT);
385                 }
386                 ent++;
387         }
388
389         return (ENXIO);
390 }
391
392 /*********************************************************************
393  *  Device initialization routine
394  *
395  *  The attach entry point is called when the driver is being loaded.
396  *  This routine identifies the type of hardware, allocates all resources
397  *  and initializes the hardware.
398  *
399  *  return 0 on success, positive on failure
400  *********************************************************************/
401
402 static int
403 igb_attach(device_t dev)
404 {
405         struct adapter  *adapter;
406         int             error = 0;
407         u16             eeprom_data;
408
409         INIT_DEBUGOUT("igb_attach: begin");
410
411         adapter = device_get_softc(dev);
412         adapter->dev = adapter->osdep.dev = dev;
413         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
414
415         /* SYSCTL stuff */
416         sysctl_ctx_init(&adapter->sysctl_ctx);
417         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
418                                         SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
419                                         device_get_nameunit(adapter->dev),
420                                         CTLFLAG_RD, 0, "");
421         if (adapter->sysctl_tree == NULL) {
422                 device_printf(adapter->dev, "can't add sysctl node\n");
423                 error = ENOMEM;
424                 goto err_sysctl;
425         }
426
427         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
428             SYSCTL_CHILDREN(adapter->sysctl_tree),
429             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430             igb_sysctl_debug_info, "I", "Debug Information");
431
432         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
433             SYSCTL_CHILDREN(adapter->sysctl_tree),
434             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
435             igb_sysctl_stats, "I", "Statistics");
436
437         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
438             SYSCTL_CHILDREN(adapter->sysctl_tree),
439             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
440             &igb_fc_setting, 0, "Flow Control");
441
442         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
443             SYSCTL_CHILDREN(adapter->sysctl_tree),
444             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
445             &igb_enable_aim, 1, "Interrupt Moderation");
446
447         callout_init(&adapter->timer);
448
449         /* Determine hardware and mac info */
450         igb_identify_hardware(adapter);
451
452         /* Setup PCI resources */
453         if (igb_allocate_pci_resources(adapter)) {
454                 device_printf(dev, "Allocation of PCI resources failed\n");
455                 error = ENXIO;
456                 goto err_pci;
457         }
458
459         /* Do Shared Code initialization */
460         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
461                 device_printf(dev, "Setup of Shared code failed\n");
462                 error = ENXIO;
463                 goto err_pci;
464         }
465
466         e1000_get_bus_info(&adapter->hw);
467
468         /* Sysctls for limiting the amount of work done in the taskqueue */
469         igb_add_rx_process_limit(adapter, "rx_processing_limit",
470             "max number of rx packets to process", &adapter->rx_process_limit,
471             igb_rx_process_limit);
472
473         /*
474          * Validate number of transmit and receive descriptors. It
475          * must not exceed hardware maximum, and must be multiple
476          * of E1000_DBA_ALIGN.
477          */
478         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
479             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
480                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
481                     IGB_DEFAULT_TXD, igb_txd);
482                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
483         } else
484                 adapter->num_tx_desc = igb_txd;
485         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
486             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
487                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
488                     IGB_DEFAULT_RXD, igb_rxd);
489                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
490         } else
491                 adapter->num_rx_desc = igb_rxd;
492
493         adapter->hw.mac.autoneg = DO_AUTO_NEG;
494         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
495         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
496
497         /* Copper options */
498         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
499                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
500                 adapter->hw.phy.disable_polarity_correction = FALSE;
501                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
502         }
503
504         /*
505          * Set the frame limits assuming
506          * standard ethernet sized frames.
507          */
508         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
509         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
510
511         /*
512         ** Allocate and Setup Queues
513         */
514         if (igb_allocate_queues(adapter)) {
515                 error = ENOMEM;
516                 goto err_pci;
517         }
518
519         /*
520         ** Start from a known state, this is
521         ** important in reading the nvm and
522         ** mac from that.
523         */
524         e1000_reset_hw(&adapter->hw);
525
526         /* Make sure we have a good EEPROM before we read from it */
527         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
528                 /*
529                 ** Some PCI-E parts fail the first check due to
530                 ** the link being in sleep state, call it again,
531                 ** if it fails a second time its a real issue.
532                 */
533                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
534                         device_printf(dev,
535                             "The EEPROM Checksum Is Not Valid\n");
536                         error = EIO;
537                         goto err_late;
538                 }
539         }
540
541         /*
542         ** Copy the permanent MAC address out of the EEPROM
543         */
544         if (e1000_read_mac_addr(&adapter->hw) < 0) {
545                 device_printf(dev, "EEPROM read error while reading MAC"
546                     " address\n");
547                 error = EIO;
548                 goto err_late;
549         }
550         /* Check its sanity */
551         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
552                 device_printf(dev, "Invalid MAC address\n");
553                 error = EIO;
554                 goto err_late;
555         }
556
557         /* 
558         ** Configure Interrupts
559         */
560         if ((adapter->msix > 1) && (igb_enable_msix))
561                 error = igb_allocate_msix(adapter);
562         else /* MSI or Legacy */
563                 error = igb_allocate_legacy(adapter);
564         if (error)
565                 goto err_late;
566
567         /* Setup OS specific network interface */
568         igb_setup_interface(dev, adapter);
569
570         /* Now get a good starting state */
571         igb_reset(adapter);
572
573         /* Initialize statistics */
574         igb_update_stats_counters(adapter);
575
576         adapter->hw.mac.get_link_status = 1;
577         igb_update_link_status(adapter);
578
579         /* Indicate SOL/IDER usage */
580         if (e1000_check_reset_block(&adapter->hw))
581                 device_printf(dev,
582                     "PHY reset is blocked due to SOL/IDER session.\n");
583
584         /* Determine if we have to control management hardware */
585         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
586
587         /*
588          * Setup Wake-on-Lan
589          */
590         /* APME bit in EEPROM is mapped to WUC.APME */
591         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
592         if (eeprom_data)
593                 adapter->wol = E1000_WUFC_MAG;
594
595         /* Register for VLAN events */
596         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
597              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
598         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
599              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
600
601         /* Tell the stack that the interface is not active */
602         adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
603
604         INIT_DEBUGOUT("igb_attach: end");
605
606         return (0);
607
608 err_late:
609         igb_free_transmit_structures(adapter);
610         igb_free_receive_structures(adapter);
611         igb_release_hw_control(adapter);
612 err_pci:
613         igb_free_pci_resources(adapter);
614 err_sysctl:
615         sysctl_ctx_free(&adapter->sysctl_ctx);
616         IGB_CORE_LOCK_DESTROY(adapter);
617
618         return (error);
619 }
620
621 /*********************************************************************
622  *  Device removal routine
623  *
624  *  The detach entry point is called when the driver is being removed.
625  *  This routine stops the adapter and deallocates all the resources
626  *  that were allocated for driver operation.
627  *
628  *  return 0 on success, positive on failure
629  *********************************************************************/
630
631 static int
632 igb_detach(device_t dev)
633 {
634         struct adapter  *adapter = device_get_softc(dev);
635
636         INIT_DEBUGOUT("igb_detach: begin");
637
638         /* Make sure VLANS are not using driver */
639         if (adapter->ifp->if_vlantrunks != NULL) {
640                 device_printf(dev,"Vlan in use, detach first\n");
641                 return (EBUSY);
642         }
643
644 #ifdef DEVICE_POLLING
645         if (adapter->ifp->if_capenable & IFCAP_POLLING)
646                 ether_poll_deregister(adapter->ifp);
647 #endif
648
649         IGB_CORE_LOCK(adapter);
650         adapter->in_detach = 1;
651         igb_stop(adapter);
652         IGB_CORE_UNLOCK(adapter);
653
654         e1000_phy_hw_reset(&adapter->hw);
655
656         /* Give control back to firmware */
657         igb_release_manageability(adapter);
658         igb_release_hw_control(adapter);
659
660         if (adapter->wol) {
661                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
662                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
663                 igb_enable_wakeup(dev);
664         }
665
666         /* Unregister VLAN events */
667         if (adapter->vlan_attach != NULL)
668                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
669         if (adapter->vlan_detach != NULL)
670                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
671
672         ether_ifdetach(adapter->ifp);
673
674         //callout_drain(&adapter->timer);
675         callout_stop(&adapter->timer);
676
677         igb_free_pci_resources(adapter);
678         bus_generic_detach(dev);
679
680         igb_free_transmit_structures(adapter);
681         igb_free_receive_structures(adapter);
682
683         sysctl_ctx_free(&adapter->sysctl_ctx);
684         IGB_CORE_LOCK_DESTROY(adapter);
685
686         return (0);
687 }
688
689 /*********************************************************************
690  *
691  *  Shutdown entry point
692  *
693  **********************************************************************/
694
695 static int
696 igb_shutdown(device_t dev)
697 {
698         return igb_suspend(dev);
699 }
700
701 /*
702  * Suspend/resume device methods.
703  */
704 static int
705 igb_suspend(device_t dev)
706 {
707         struct adapter *adapter = device_get_softc(dev);
708
709         IGB_CORE_LOCK(adapter);
710
711         igb_stop(adapter);
712
713         igb_release_manageability(adapter);
714         igb_release_hw_control(adapter);
715
716         if (adapter->wol) {
717                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
718                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
719                 igb_enable_wakeup(dev);
720         }
721
722         IGB_CORE_UNLOCK(adapter);
723
724         return bus_generic_suspend(dev);
725 }
726
727 static int
728 igb_resume(device_t dev)
729 {
730         struct adapter *adapter = device_get_softc(dev);
731         struct ifnet *ifp = adapter->ifp;
732
733         IGB_CORE_LOCK(adapter);
734         igb_init_locked(adapter);
735         igb_init_manageability(adapter);
736
737         if ((ifp->if_flags & IFF_UP) &&
738             (ifp->if_flags & IFF_RUNNING))
739                 igb_start(ifp);
740
741         IGB_CORE_UNLOCK(adapter);
742
743         return bus_generic_resume(dev);
744 }
745
746
747 /*********************************************************************
748  *  Transmit entry point
749  *
750  *  igb_start is called by the stack to initiate a transmit.
751  *  The driver will remain in this routine as long as there are
752  *  packets to transmit and transmit resources are available.
753  *  In case resources are not available stack is notified and
754  *  the packet is requeued.
755  **********************************************************************/
756
757 static void
758 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
759 {
760         struct adapter  *adapter = ifp->if_softc;
761         struct mbuf     *m_head;
762
763         IGB_TX_LOCK_ASSERT(txr);
764
765         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) !=
766             IFF_RUNNING)
767                 return;
768         if (!adapter->link_active)
769                 return;
770
771         while (!ifq_is_empty(&ifp->if_snd)) {
772
773                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
774                 if (m_head == NULL)
775                         break;
776                 /*
777                  *  Encapsulation can modify our pointer, and or make it
778                  *  NULL on failure.  In that event, we can't requeue.
779                  */
780                 if (igb_xmit(txr, &m_head)) {
781                         if (m_head == NULL)
782                                 break;
783                         ifp->if_flags |= IFF_OACTIVE;
784                         ifq_prepend(&ifp->if_snd, m_head);
785                         break;
786                 }
787
788                 /* Send a copy of the frame to the BPF listener */
789                 ETHER_BPF_MTAP(ifp, m_head);
790
791                 /* Set watchdog on */
792                 txr->watchdog_check = TRUE;
793         }
794 }
795  
796 /*
797  * Legacy TX driver routine, called from the
798  * stack, always uses tx[0], and spins for it.
799  * Should not be used with multiqueue tx
800  */
801 static void
802 igb_start(struct ifnet *ifp)
803 {
804         struct adapter  *adapter = ifp->if_softc;
805         struct tx_ring  *txr = adapter->tx_rings;
806
807         if (ifp->if_flags & IFF_RUNNING) {
808                 IGB_TX_LOCK(txr);
809                 igb_start_locked(txr, ifp);
810                 IGB_TX_UNLOCK(txr);
811         }
812         return;
813 }
814
815 #if __FreeBSD_version >= 800000
816 /*
817 ** Multiqueue Transmit driver
818 **
819 */
820 static int
821 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
822 {
823         struct adapter  *adapter = ifp->if_softc;
824         struct tx_ring  *txr;
825         int             i = 0, err = 0;
826
827         /* Which queue to use */
828         if ((m->m_flags & M_FLOWID) != 0)
829                 i = m->m_pkthdr.flowid % adapter->num_queues;
830         txr = &adapter->tx_rings[i];
831
832         if (IGB_TX_TRYLOCK(txr)) {
833                 err = igb_mq_start_locked(ifp, txr, m);
834                 IGB_TX_UNLOCK(txr);
835         } else
836                 err = drbr_enqueue(ifp, txr->br, m);
837
838         return (err);
839 }
840
841 static int
842 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
843 {
844         struct adapter  *adapter = txr->adapter;
845         struct mbuf     *next;
846         int             err = 0, enq;
847
848         IGB_TX_LOCK_ASSERT(txr);
849
850         if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) !=
851             IFF_RUNNING || adapter->link_active == 0) {
852                 if (m != NULL)
853                         err = drbr_enqueue(ifp, txr->br, m);
854                 return (err);
855         }
856
857         enq = 0;
858         if (m == NULL) {
859                 next = drbr_dequeue(ifp, txr->br);
860         } else if (drbr_needs_enqueue(ifp, txr->br)) {
861                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
862                         return (err);
863                 next = drbr_dequeue(ifp, txr->br);
864         } else
865                 next = m;
866         /* Process the queue */
867         while (next != NULL) {
868                 if ((err = igb_xmit(txr, &next)) != 0) {
869                         if (next != NULL)
870                                 err = drbr_enqueue(ifp, txr->br, next);
871                         break;
872                 }
873                 enq++;
874                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
875                 ETHER_BPF_MTAP(ifp, next);
876                 if ((ifp->if_flags & IFF_RUNNING) == 0)
877                         break;
878                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
879                         ifp->if_flags |= IFF_OACTIVE;
880                         break;
881                 }
882                 next = drbr_dequeue(ifp, txr->br);
883         }
884         if (enq > 0) {
885                 /* Set the watchdog */
886                 txr->watchdog_check = TRUE;
887         }
888         return (err);
889 }
890
891 /*
892 ** Flush all ring buffers
893 */
894 static void
895 igb_qflush(struct ifnet *ifp)
896 {
897         struct adapter  *adapter = ifp->if_softc;
898         struct tx_ring  *txr = adapter->tx_rings;
899         struct mbuf     *m;
900
901         for (int i = 0; i < adapter->num_queues; i++, txr++) {
902                 IGB_TX_LOCK(txr);
903                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
904                         m_freem(m);
905                 IGB_TX_UNLOCK(txr);
906         }
907         if_qflush(ifp);
908 }
909 #endif /* __FreeBSD_version >= 800000 */
910
911 /*********************************************************************
912  *  Ioctl entry point
913  *
914  *  igb_ioctl is called when the user wants to configure the
915  *  interface.
916  *
917  *  return 0 on success, positive on failure
918  **********************************************************************/
919
920 static int
921 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cred)
922 {
923         struct adapter  *adapter = ifp->if_softc;
924         struct ifreq *ifr = (struct ifreq *)data;
925 #ifdef INET
926         struct ifaddr *ifa = (struct ifaddr *)data;
927 #endif
928         int error = 0;
929
930         if (adapter->in_detach)
931                 return (error);
932
933         switch (command) {
934         case SIOCSIFADDR:
935 #ifdef INET
936                 if (ifa->ifa_addr->sa_family == AF_INET) {
937                         /*
938                          * XXX
939                          * Since resetting hardware takes a very long time
940                          * and results in link renegotiation we only
941                          * initialize the hardware only when it is absolutely
942                          * required.
943                          */
944                         ifp->if_flags |= IFF_UP;
945                         if (!(ifp->if_flags & IFF_RUNNING)) {
946                                 IGB_CORE_LOCK(adapter);
947                                 igb_init_locked(adapter);
948                                 IGB_CORE_UNLOCK(adapter);
949                         }
950                         if (!(ifp->if_flags & IFF_NOARP))
951                                 arp_ifinit(ifp, ifa);
952                 } else
953 #endif
954                         error = ether_ioctl(ifp, command, data);
955                 break;
956         case SIOCSIFMTU:
957             {
958                 int max_frame_size;
959
960                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
961
962                 IGB_CORE_LOCK(adapter);
963                 max_frame_size = 9234;
964                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
965                     ETHER_CRC_LEN) {
966                         IGB_CORE_UNLOCK(adapter);
967                         error = EINVAL;
968                         break;
969                 }
970
971                 ifp->if_mtu = ifr->ifr_mtu;
972                 adapter->max_frame_size =
973                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
974                 igb_init_locked(adapter);
975                 IGB_CORE_UNLOCK(adapter);
976                 break;
977             }
978         case SIOCSIFFLAGS:
979                 IOCTL_DEBUGOUT("ioctl rcv'd:\
980                     SIOCSIFFLAGS (Set Interface Flags)");
981                 IGB_CORE_LOCK(adapter);
982                 if (ifp->if_flags & IFF_UP) {
983                         if ((ifp->if_flags & IFF_RUNNING)) {
984                                 if ((ifp->if_flags ^ adapter->if_flags) &
985                                     (IFF_PROMISC | IFF_ALLMULTI)) {
986                                         igb_disable_promisc(adapter);
987                                         igb_set_promisc(adapter);
988                                 }
989                         } else
990                                 igb_init_locked(adapter);
991                 } else
992                         if (ifp->if_flags & IFF_RUNNING)
993                                 igb_stop(adapter); 
994                 adapter->if_flags = ifp->if_flags;
995                 IGB_CORE_UNLOCK(adapter);
996                 break;
997         case SIOCADDMULTI:
998         case SIOCDELMULTI:
999                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1000                 if (ifp->if_flags & IFF_RUNNING) {
1001                         IGB_CORE_LOCK(adapter);
1002                         igb_disable_intr(adapter);
1003                         igb_set_multi(adapter);
1004 #ifdef DEVICE_POLLING
1005                         if (!(ifp->if_capenable & IFCAP_POLLING))
1006 #endif
1007                                 igb_enable_intr(adapter);
1008                         IGB_CORE_UNLOCK(adapter);
1009                 }
1010                 break;
1011         case SIOCSIFMEDIA:
1012                 /* Check SOL/IDER usage */
1013                 IGB_CORE_LOCK(adapter);
1014                 if (e1000_check_reset_block(&adapter->hw)) {
1015                         IGB_CORE_UNLOCK(adapter);
1016                         device_printf(adapter->dev, "Media change is"
1017                             " blocked due to SOL/IDER session.\n");
1018                         break;
1019                 }
1020                 IGB_CORE_UNLOCK(adapter);
1021         case SIOCGIFMEDIA:
1022                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1023                     SIOCxIFMEDIA (Get/Set Interface Media)");
1024                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1025                 break;
1026         case SIOCSIFCAP:
1027             {
1028                 int mask, reinit;
1029
1030                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1031                 reinit = 0;
1032                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1033 #ifdef DEVICE_POLLING
1034                 if (mask & IFCAP_POLLING) {
1035                         if (ifr->ifr_reqcap & IFCAP_POLLING) {
1036                                 error = ether_poll_register(igb_poll, ifp);
1037                                 if (error)
1038                                         return (error);
1039                                 IGB_CORE_LOCK(adapter);
1040                                 igb_disable_intr(adapter);
1041                                 ifp->if_capenable |= IFCAP_POLLING;
1042                                 IGB_CORE_UNLOCK(adapter);
1043                         } else {
1044                                 error = ether_poll_deregister(ifp);
1045                                 /* Enable interrupt even in error case */
1046                                 IGB_CORE_LOCK(adapter);
1047                                 igb_enable_intr(adapter);
1048                                 ifp->if_capenable &= ~IFCAP_POLLING;
1049                                 IGB_CORE_UNLOCK(adapter);
1050                         }
1051                 }
1052 #endif
1053                 if (mask & IFCAP_HWCSUM) {
1054                         ifp->if_capenable ^= IFCAP_HWCSUM;
1055                         reinit = 1;
1056                 }
1057 #ifdef NET_TSO 
1058                 if (mask & IFCAP_TSO4) {
1059                         ifp->if_capenable ^= IFCAP_TSO4;
1060                         reinit = 1;
1061                 }
1062 #endif
1063                 if (mask & IFCAP_VLAN_HWTAGGING) {
1064                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1065                         reinit = 1;
1066                 }
1067 #ifdef NET_LRO 
1068                 if (mask & IFCAP_LRO) {
1069                         ifp->if_capenable ^= IFCAP_LRO;
1070                         reinit = 1;
1071                 }
1072 #endif
1073                 if (reinit && (ifp->if_flags & IFF_RUNNING))
1074                         igb_init(adapter);
1075 #if 0
1076                 VLAN_CAPABILITIES(ifp);
1077 #endif
1078                 break;
1079             }
1080
1081         default:
1082                 error = ether_ioctl(ifp, command, data);
1083                 break;
1084         }
1085
1086         return (error);
1087 }
1088
1089
1090 /*********************************************************************
1091  *  Init entry point
1092  *
1093  *  This routine is used in two ways. It is used by the stack as
1094  *  init entry point in network interface structure. It is also used
1095  *  by the driver as a hw/sw initialization routine to get to a
1096  *  consistent state.
1097  *
1098  *  return 0 on success, positive on failure
1099  **********************************************************************/
1100
1101 static void
1102 igb_init_locked(struct adapter *adapter)
1103 {
1104         struct ifnet    *ifp = adapter->ifp;
1105         device_t        dev = adapter->dev;
1106
1107         INIT_DEBUGOUT("igb_init: begin");
1108
1109         IGB_CORE_LOCK_ASSERT(adapter);
1110
1111         igb_disable_intr(adapter);
1112         callout_stop(&adapter->timer);
1113
1114         /* Get the latest mac address, User can use a LAA */
1115         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1116               ETHER_ADDR_LEN);
1117
1118         /* Put the address into the Receive Address Array */
1119         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1120
1121         igb_reset(adapter);
1122         igb_update_link_status(adapter);
1123
1124         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1125
1126         /* Set hardware offload abilities */
1127         ifp->if_hwassist = 0;
1128         if (ifp->if_capenable & IFCAP_TXCSUM) {
1129                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1130 #if __FreeBSD_version >= 800000
1131                 if (adapter->hw.mac.type == e1000_82576)
1132                         ifp->if_hwassist |= CSUM_SCTP;
1133 #endif
1134         }
1135
1136 #ifdef NET_TSO
1137         if (ifp->if_capenable & IFCAP_TSO4)
1138                 ifp->if_hwassist |= CSUM_TSO;
1139 #endif
1140
1141         /* Configure for OS presence */
1142         igb_init_manageability(adapter);
1143
1144         /* Prepare transmit descriptors and buffers */
1145         igb_setup_transmit_structures(adapter);
1146         igb_initialize_transmit_units(adapter);
1147
1148         /* Setup Multicast table */
1149         igb_set_multi(adapter);
1150
1151         /*
1152         ** Figure out the desired mbuf pool
1153         ** for doing jumbo/packetsplit
1154         */
1155         if (ifp->if_mtu > ETHERMTU)
1156                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1157         else
1158                 adapter->rx_mbuf_sz = MCLBYTES;
1159
1160         /* Prepare receive descriptors and buffers */
1161         if (igb_setup_receive_structures(adapter)) {
1162                 device_printf(dev, "Could not setup receive structures\n");
1163                 return;
1164         }
1165         igb_initialize_receive_units(adapter);
1166
1167         /* Don't lose promiscuous settings */
1168         igb_set_promisc(adapter);
1169
1170         ifp->if_flags |= IFF_RUNNING;
1171         ifp->if_flags &= ~IFF_OACTIVE;
1172
1173         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1174         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1175
1176         if (adapter->msix > 1) /* Set up queue routing */
1177                 igb_configure_queues(adapter);
1178
1179         /* Set up VLAN tag offload and filter */
1180         igb_setup_vlan_hw_support(adapter);
1181
1182         /* this clears any pending interrupts */
1183         E1000_READ_REG(&adapter->hw, E1000_ICR);
1184 #ifdef DEVICE_POLLING
1185         /*
1186          * Only enable interrupts if we are not polling, make sure
1187          * they are off otherwise.
1188          */
1189         if (ifp->if_capenable & IFCAP_POLLING)
1190                 igb_disable_intr(adapter);
1191         else
1192 #endif /* DEVICE_POLLING */
1193         {
1194         igb_enable_intr(adapter);
1195         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1196         }
1197
1198         /* Don't reset the phy next time init gets called */
1199         adapter->hw.phy.reset_disable = TRUE;
1200 }
1201
1202 static void
1203 igb_init(void *arg)
1204 {
1205         struct adapter *adapter = arg;
1206
1207         IGB_CORE_LOCK(adapter);
1208         igb_init_locked(adapter);
1209         IGB_CORE_UNLOCK(adapter);
1210 }
1211
1212
1213 static void
1214 igb_handle_rxtx(void *context, int pending)
1215 {
1216         struct adapter  *adapter = context;
1217         struct tx_ring  *txr = adapter->tx_rings;
1218         struct rx_ring  *rxr = adapter->rx_rings;
1219         struct ifnet    *ifp;
1220
1221         ifp = adapter->ifp;
1222
1223         if (ifp->if_flags & IFF_RUNNING) {
1224                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1225                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1226                 IGB_TX_LOCK(txr);
1227                 igb_txeof(txr);
1228
1229 #if __FreeBSD_version >= 800000
1230                 if (!drbr_empty(ifp, txr->br))
1231                         igb_mq_start_locked(ifp, txr, NULL);
1232 #else
1233                 if (!ifq_is_empty(&ifp->if_snd))
1234                         igb_start_locked(txr, ifp);
1235 #endif
1236                 IGB_TX_UNLOCK(txr);
1237         }
1238
1239         igb_enable_intr(adapter);
1240 }
1241
1242 static void
1243 igb_handle_que(void *context, int pending)
1244 {
1245         struct igb_queue *que = context;
1246         struct adapter *adapter = que->adapter;
1247         struct tx_ring *txr = que->txr;
1248         struct rx_ring *rxr = que->rxr;
1249         struct ifnet    *ifp = adapter->ifp;
1250         u32             loop = IGB_MAX_LOOP;
1251         bool            more;
1252
1253         /* RX first */
1254         do {
1255                 more = igb_rxeof(rxr, -1);
1256         } while (loop-- && more);
1257
1258         if (IGB_TX_TRYLOCK(txr)) {
1259                 loop = IGB_MAX_LOOP;
1260                 do {
1261                         more = igb_txeof(txr);
1262                 } while (loop-- && more);
1263 #if __FreeBSD_version >= 800000
1264                 igb_mq_start_locked(ifp, txr, NULL);
1265 #else
1266                 if (!ifq_is_empty(&ifp->if_snd))
1267                         igb_start_locked(txr, ifp);
1268 #endif
1269                 IGB_TX_UNLOCK(txr);
1270         }
1271
1272         /* Reenable this interrupt */
1273 #ifdef DEVICE_POLLING
1274         if (!(ifp->if_capenable & IFCAP_POLLING))
1275 #endif
1276         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1277 }
1278
1279 /* Deal with link in a sleepable context */
1280 static void
1281 igb_handle_link(void *context, int pending)
1282 {
1283         struct adapter *adapter = context;
1284
1285         adapter->hw.mac.get_link_status = 1;
1286         igb_update_link_status(adapter);
1287 }
1288
1289 /*********************************************************************
1290  *
1291  *  MSI/Legacy Deferred
1292  *  Interrupt Service routine  
1293  *
1294  *********************************************************************/
1295 #define FILTER_STRAY
1296 #define FILTER_HANDLED
1297 static void
1298 igb_irq_fast(void *arg)
1299 {
1300         struct adapter  *adapter = arg;
1301         uint32_t        reg_icr;
1302
1303
1304         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1305
1306         /* Hot eject?  */
1307         if (reg_icr == 0xffffffff)
1308                 return FILTER_STRAY; 
1309
1310         /* Definitely not our interrupt.  */
1311         if (reg_icr == 0x0)
1312                 return FILTER_STRAY;
1313
1314         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1315                 return FILTER_STRAY;
1316
1317         /*
1318          * Mask interrupts until the taskqueue is finished running.  This is
1319          * cheap, just assume that it is needed.  This also works around the
1320          * MSI message reordering errata on certain systems.
1321          */
1322         igb_disable_intr(adapter);
1323         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1324
1325         /* Link status change */
1326         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1327                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1328
1329         if (reg_icr & E1000_ICR_RXO)
1330                 adapter->rx_overruns++;
1331         return FILTER_HANDLED;
1332 }
1333
1334 #ifdef DEVICE_POLLING
1335 /*********************************************************************
1336  *
1337  *  Legacy polling routine  
1338  *
1339  *********************************************************************/
1340 #if __FreeBSD_version >= 800000
1341 #define POLL_RETURN_COUNT(a) (a)
1342 static int
1343 #else
1344 #define POLL_RETURN_COUNT(a)
1345 static void
1346 #endif
1347 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1348 {
1349         struct adapter *adapter = ifp->if_softc;
1350         struct rx_ring  *rxr = adapter->rx_rings;
1351         struct tx_ring  *txr = adapter->tx_rings;
1352         u32             reg_icr, rx_done = 0;
1353         u32             loop = IGB_MAX_LOOP;
1354         bool            more;
1355
1356         IGB_CORE_LOCK(adapter);
1357         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1358                 IGB_CORE_UNLOCK(adapter);
1359                 return POLL_RETURN_COUNT(rx_done);
1360         }
1361
1362         if (cmd == POLL_AND_CHECK_STATUS) {
1363                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1364                 /* Link status change */
1365                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1366                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1367
1368                 if (reg_icr & E1000_ICR_RXO)
1369                         adapter->rx_overruns++;
1370         }
1371         IGB_CORE_UNLOCK(adapter);
1372
1373         /* TODO: rx_count */
1374         rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1375
1376         IGB_TX_LOCK(txr);
1377         do {
1378                 more = igb_txeof(txr);
1379         } while (loop-- && more);
1380 #if __FreeBSD_version >= 800000
1381         if (!drbr_empty(ifp, txr->br))
1382                 igb_mq_start_locked(ifp, txr, NULL);
1383 #else
1384         if (!ifq_is_empty(&ifp->if_snd))
1385                 igb_start_locked(txr, ifp);
1386 #endif
1387         IGB_TX_UNLOCK(txr);
1388         return POLL_RETURN_COUNT(rx_done);
1389 }
1390 #endif /* DEVICE_POLLING */
1391
1392 /*********************************************************************
1393  *
1394  *  MSIX TX Interrupt Service routine
1395  *
1396  **********************************************************************/
1397 static void
1398 igb_msix_que(void *arg)
1399 {
1400         struct igb_queue *que = arg;
1401         struct adapter *adapter = que->adapter;
1402         struct tx_ring *txr = que->txr;
1403         struct rx_ring *rxr = que->rxr;
1404         u32             newitr = 0;
1405         bool            more_tx, more_rx;
1406
1407         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1408         ++que->irqs;
1409
1410         IGB_TX_LOCK(txr);
1411         more_tx = igb_txeof(txr);
1412         IGB_TX_UNLOCK(txr);
1413
1414         more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1415
1416         if (igb_enable_aim == FALSE)
1417                 goto no_calc;
1418         /*
1419         ** Do Adaptive Interrupt Moderation:
1420         **  - Write out last calculated setting
1421         **  - Calculate based on average size over
1422         **    the last interval.
1423         */
1424         if (que->eitr_setting)
1425                 E1000_WRITE_REG(&adapter->hw,
1426                     E1000_EITR(que->msix), que->eitr_setting);
1427  
1428         que->eitr_setting = 0;
1429
1430         /* Idle, do nothing */
1431         if ((txr->bytes == 0) && (rxr->bytes == 0))
1432                 goto no_calc;
1433                                 
1434         /* Used half Default if sub-gig */
1435         if (adapter->link_speed != 1000)
1436                 newitr = IGB_DEFAULT_ITR / 2;
1437         else {
1438                 if ((txr->bytes) && (txr->packets))
1439                         newitr = txr->bytes/txr->packets;
1440                 if ((rxr->bytes) && (rxr->packets))
1441                         newitr = max(newitr,
1442                             (rxr->bytes / rxr->packets));
1443                 newitr += 24; /* account for hardware frame, crc */
1444                 /* set an upper boundary */
1445                 newitr = min(newitr, 3000);
1446                 /* Be nice to the mid range */
1447                 if ((newitr > 300) && (newitr < 1200))
1448                         newitr = (newitr / 3);
1449                 else
1450                         newitr = (newitr / 2);
1451         }
1452         newitr &= 0x7FFC;  /* Mask invalid bits */
1453         if (adapter->hw.mac.type == e1000_82575)
1454                 newitr |= newitr << 16;
1455         else
1456                 newitr |= 0x8000000;
1457                  
1458         /* save for next interrupt */
1459         que->eitr_setting = newitr;
1460
1461         /* Reset state */
1462         txr->bytes = 0;
1463         txr->packets = 0;
1464         rxr->bytes = 0;
1465         rxr->packets = 0;
1466
1467 no_calc:
1468         /* Schedule a clean task if needed*/
1469         if (more_tx || more_rx) 
1470                 taskqueue_enqueue(que->tq, &que->que_task);
1471         else
1472                 /* Reenable this interrupt */
1473                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1474         return;
1475 }
1476
1477
1478 /*********************************************************************
1479  *
1480  *  MSIX Link Interrupt Service routine
1481  *
1482  **********************************************************************/
1483
1484 static void
1485 igb_msix_link(void *arg)
1486 {
1487         struct adapter  *adapter = arg;
1488         u32             icr;
1489
1490         ++adapter->link_irq;
1491         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1492         if (!(icr & E1000_ICR_LSC))
1493                 goto spurious;
1494         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1495
1496 spurious:
1497         /* Rearm */
1498         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1499         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1500         return;
1501 }
1502
1503
1504 /*********************************************************************
1505  *
1506  *  Media Ioctl callback
1507  *
1508  *  This routine is called whenever the user queries the status of
1509  *  the interface using ifconfig.
1510  *
1511  **********************************************************************/
1512 static void
1513 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1514 {
1515         struct adapter *adapter = ifp->if_softc;
1516         u_char fiber_type = IFM_1000_SX;
1517
1518         INIT_DEBUGOUT("igb_media_status: begin");
1519
1520         IGB_CORE_LOCK(adapter);
1521         igb_update_link_status(adapter);
1522
1523         ifmr->ifm_status = IFM_AVALID;
1524         ifmr->ifm_active = IFM_ETHER;
1525
1526         if (!adapter->link_active) {
1527                 IGB_CORE_UNLOCK(adapter);
1528                 return;
1529         }
1530
1531         ifmr->ifm_status |= IFM_ACTIVE;
1532
1533         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1534             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1535                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1536         else {
1537                 switch (adapter->link_speed) {
1538                 case 10:
1539                         ifmr->ifm_active |= IFM_10_T;
1540                         break;
1541                 case 100:
1542                         ifmr->ifm_active |= IFM_100_TX;
1543                         break;
1544                 case 1000:
1545                         ifmr->ifm_active |= IFM_1000_T;
1546                         break;
1547                 }
1548                 if (adapter->link_duplex == FULL_DUPLEX)
1549                         ifmr->ifm_active |= IFM_FDX;
1550                 else
1551                         ifmr->ifm_active |= IFM_HDX;
1552         }
1553         IGB_CORE_UNLOCK(adapter);
1554 }
1555
1556 /*********************************************************************
1557  *
1558  *  Media Ioctl callback
1559  *
1560  *  This routine is called when the user changes speed/duplex using
1561  *  media/mediopt option with ifconfig.
1562  *
1563  **********************************************************************/
1564 static int
1565 igb_media_change(struct ifnet *ifp)
1566 {
1567         struct adapter *adapter = ifp->if_softc;
1568         struct ifmedia  *ifm = &adapter->media;
1569
1570         INIT_DEBUGOUT("igb_media_change: begin");
1571
1572         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1573                 return (EINVAL);
1574
1575         IGB_CORE_LOCK(adapter);
1576         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1577         case IFM_AUTO:
1578                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1579                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1580                 break;
1581         case IFM_1000_LX:
1582         case IFM_1000_SX:
1583         case IFM_1000_T:
1584                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1585                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1586                 break;
1587         case IFM_100_TX:
1588                 adapter->hw.mac.autoneg = FALSE;
1589                 adapter->hw.phy.autoneg_advertised = 0;
1590                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1591                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1592                 else
1593                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1594                 break;
1595         case IFM_10_T:
1596                 adapter->hw.mac.autoneg = FALSE;
1597                 adapter->hw.phy.autoneg_advertised = 0;
1598                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1599                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1600                 else
1601                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1602                 break;
1603         default:
1604                 device_printf(adapter->dev, "Unsupported media type\n");
1605         }
1606
1607         /* As the speed/duplex settings my have changed we need to
1608          * reset the PHY.
1609          */
1610         adapter->hw.phy.reset_disable = FALSE;
1611
1612         igb_init_locked(adapter);
1613         IGB_CORE_UNLOCK(adapter);
1614
1615         return (0);
1616 }
1617
1618
1619 /*********************************************************************
1620  *
1621  *  This routine maps the mbufs to Advanced TX descriptors.
1622  *  used by the 82575 adapter.
1623  *  
1624  **********************************************************************/
1625
1626 static int
1627 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1628 {
1629         struct adapter          *adapter = txr->adapter;
1630         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1631         bus_dmamap_t            map;
1632         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1633         union e1000_adv_tx_desc *txd = NULL;
1634         struct mbuf             *m_head;
1635         u32                     olinfo_status = 0, cmd_type_len = 0;
1636         int                     nsegs, i, j, error, first, last = 0;
1637         u32                     hdrlen = 0;
1638
1639         m_head = *m_headp;
1640
1641
1642         /* Set basic descriptor constants */
1643         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1644         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1645         if (m_head->m_flags & M_VLANTAG)
1646                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1647
1648         /*
1649          * Force a cleanup if number of TX descriptors
1650          * available hits the threshold
1651          */
1652         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1653                 igb_txeof(txr);
1654                 /* Now do we at least have a minimal? */
1655                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1656                         txr->no_desc_avail++;
1657                         return (ENOBUFS);
1658                 }
1659         }
1660
1661         /*
1662          * Map the packet for DMA.
1663          *
1664          * Capture the first descriptor index,
1665          * this descriptor will have the index
1666          * of the EOP which is the only one that
1667          * now gets a DONE bit writeback.
1668          */
1669         first = txr->next_avail_desc;
1670         tx_buffer = &txr->tx_buffers[first];
1671         tx_buffer_mapped = tx_buffer;
1672         map = tx_buffer->map;
1673
1674         error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1675             *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1676
1677         if (error == EFBIG) {
1678                 struct mbuf *m;
1679
1680                 m = m_defrag(*m_headp, MB_DONTWAIT);
1681                 if (m == NULL) {
1682                         adapter->mbuf_defrag_failed++;
1683                         m_freem(*m_headp);
1684                         *m_headp = NULL;
1685                         return (ENOBUFS);
1686                 }
1687                 *m_headp = m;
1688
1689                 /* Try it again */
1690                 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1691                     *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1692
1693                 if (error == ENOMEM) {
1694                         adapter->no_tx_dma_setup++;
1695                         return (error);
1696                 } else if (error != 0) {
1697                         adapter->no_tx_dma_setup++;
1698                         m_freem(*m_headp);
1699                         *m_headp = NULL;
1700                         return (error);
1701                 }
1702         } else if (error == ENOMEM) {
1703                 adapter->no_tx_dma_setup++;
1704                 return (error);
1705         } else if (error != 0) {
1706                 adapter->no_tx_dma_setup++;
1707                 m_freem(*m_headp);
1708                 *m_headp = NULL;
1709                 return (error);
1710         }
1711
1712         /* Check again to be sure we have enough descriptors */
1713         if (nsegs > (txr->tx_avail - 2)) {
1714                 txr->no_desc_avail++;
1715                 bus_dmamap_unload(txr->txtag, map);
1716                 return (ENOBUFS);
1717         }
1718         m_head = *m_headp;
1719
1720         /*
1721          * Set up the context descriptor:
1722          * used when any hardware offload is done.
1723          * This includes CSUM, VLAN, and TSO. It
1724          * will use the first descriptor.
1725          */
1726 #ifdef NET_TSO
1727         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1728                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1729                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1730                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1731                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1732                 } else
1733                         return (ENXIO); 
1734         } else
1735 #endif
1736                if (igb_tx_ctx_setup(txr, m_head))
1737                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1738
1739         /* Calculate payload length */
1740         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1741             << E1000_ADVTXD_PAYLEN_SHIFT);
1742
1743         /* 82575 needs the queue index added */
1744         if (adapter->hw.mac.type == e1000_82575)
1745                 olinfo_status |= txr->me << 4;
1746
1747         /* Set up our transmit descriptors */
1748         i = txr->next_avail_desc;
1749         for (j = 0; j < nsegs; j++) {
1750                 bus_size_t seg_len;
1751                 bus_addr_t seg_addr;
1752
1753                 tx_buffer = &txr->tx_buffers[i];
1754                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1755                 seg_addr = segs[j].ds_addr;
1756                 seg_len  = segs[j].ds_len;
1757
1758                 txd->read.buffer_addr = htole64(seg_addr);
1759                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1760                 txd->read.olinfo_status = htole32(olinfo_status);
1761                 last = i;
1762                 if (++i == adapter->num_tx_desc)
1763                         i = 0;
1764                 tx_buffer->m_head = NULL;
1765                 tx_buffer->next_eop = -1;
1766         }
1767
1768         txr->next_avail_desc = i;
1769         txr->tx_avail -= nsegs;
1770
1771         tx_buffer->m_head = m_head;
1772         tx_buffer_mapped->map = tx_buffer->map;
1773         tx_buffer->map = map;
1774         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1775
1776         /*
1777          * Last Descriptor of Packet
1778          * needs End Of Packet (EOP)
1779          * and Report Status (RS)
1780          */
1781         txd->read.cmd_type_len |=
1782             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1783         /*
1784          * Keep track in the first buffer which
1785          * descriptor will be written back
1786          */
1787         tx_buffer = &txr->tx_buffers[first];
1788         tx_buffer->next_eop = last;
1789         txr->watchdog_time = ticks;
1790
1791         /*
1792          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1793          * that this frame is available to transmit.
1794          */
1795         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1796             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1797         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1798         ++txr->tx_packets;
1799
1800         return (0);
1801
1802 }
1803
1804 static void
1805 igb_set_promisc(struct adapter *adapter)
1806 {
1807         struct ifnet    *ifp = adapter->ifp;
1808         uint32_t        reg_rctl;
1809
1810         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1811
1812         if (ifp->if_flags & IFF_PROMISC) {
1813                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1814                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1815         } else if (ifp->if_flags & IFF_ALLMULTI) {
1816                 reg_rctl |= E1000_RCTL_MPE;
1817                 reg_rctl &= ~E1000_RCTL_UPE;
1818                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1819         }
1820 }
1821
1822 static void
1823 igb_disable_promisc(struct adapter *adapter)
1824 {
1825         uint32_t        reg_rctl;
1826
1827         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1828
1829         reg_rctl &=  (~E1000_RCTL_UPE);
1830         reg_rctl &=  (~E1000_RCTL_MPE);
1831         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1832 }
1833
1834
1835 /*********************************************************************
1836  *  Multicast Update
1837  *
1838  *  This routine is called whenever multicast address list is updated.
1839  *
1840  **********************************************************************/
1841
1842 static void
1843 igb_set_multi(struct adapter *adapter)
1844 {
1845         struct ifnet    *ifp = adapter->ifp;
1846         struct ifmultiaddr *ifma;
1847         u32 reg_rctl = 0;
1848         u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1849
1850         int mcnt = 0;
1851
1852         IOCTL_DEBUGOUT("igb_set_multi: begin");
1853
1854 #if 0
1855 #if __FreeBSD_version < 800000
1856         IF_ADDR_LOCK(ifp);
1857 #else
1858         if_maddr_rlock(ifp);
1859 #endif
1860 #endif
1861
1862         LIST_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1863                 if (ifma->ifma_addr->sa_family != AF_LINK)
1864                         continue;
1865
1866                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1867                         break;
1868
1869                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1870                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1871                 mcnt++;
1872         }
1873 #if 0
1874 #if __FreeBSD_version < 800000
1875         IF_ADDR_UNLOCK(ifp);
1876 #else
1877         if_maddr_runlock(ifp);
1878 #endif
1879 #endif
1880
1881         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1882                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1883                 reg_rctl |= E1000_RCTL_MPE;
1884                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1885         } else
1886                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1887 }
1888
1889
1890 /*********************************************************************
1891  *  Timer routine:
1892  *      This routine checks for link status,
1893  *      updates statistics, and does the watchdog.
1894  *
1895  **********************************************************************/
1896
1897 static void
1898 igb_local_timer(void *arg)
1899 {
1900         struct adapter          *adapter = arg;
1901
1902         IGB_CORE_LOCK(adapter);
1903
1904         struct ifnet            *ifp = adapter->ifp;
1905         device_t                dev = adapter->dev;
1906         struct tx_ring          *txr = adapter->tx_rings;
1907
1908
1909         IGB_CORE_LOCK_ASSERT(adapter);
1910
1911         igb_update_link_status(adapter);
1912         igb_update_stats_counters(adapter);
1913
1914         if (igb_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1915                 igb_print_hw_stats(adapter);
1916
1917         /*
1918         ** Watchdog: check for time since any descriptor was cleaned
1919         */
1920         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1921                 if (txr->watchdog_check == FALSE)
1922                         continue;
1923                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1924                         goto timeout;
1925         }
1926
1927         /* Trigger an RX interrupt on all queues */
1928 #ifdef DEVICE_POLLING
1929         if (!(ifp->if_capenable & IFCAP_POLLING))
1930 #endif
1931         E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1932         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1933         IGB_CORE_UNLOCK(adapter);
1934         return;
1935
1936 timeout:
1937         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1938         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1939             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1940             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1941         device_printf(dev,"TX(%d) desc avail = %d,"
1942             "Next TX to Clean = %d\n",
1943             txr->me, txr->tx_avail, txr->next_to_clean);
1944         adapter->ifp->if_flags &= ~IFF_RUNNING;
1945         adapter->watchdog_events++;
1946         igb_init_locked(adapter);
1947         IGB_CORE_UNLOCK(adapter);
1948 }
1949
1950 static void
1951 igb_update_link_status(struct adapter *adapter)
1952 {
1953         struct e1000_hw *hw = &adapter->hw;
1954         struct ifnet *ifp = adapter->ifp;
1955         device_t dev = adapter->dev;
1956         struct tx_ring *txr = adapter->tx_rings;
1957         u32 link_check = 0;
1958
1959         /* Get the cached link value or read for real */
1960         switch (hw->phy.media_type) {
1961         case e1000_media_type_copper:
1962                 if (hw->mac.get_link_status) {
1963                         /* Do the work to read phy */
1964                         e1000_check_for_link(hw);
1965                         link_check = !hw->mac.get_link_status;
1966                 } else
1967                         link_check = TRUE;
1968                 break;
1969         case e1000_media_type_fiber:
1970                 e1000_check_for_link(hw);
1971                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1972                                  E1000_STATUS_LU);
1973                 break;
1974         case e1000_media_type_internal_serdes:
1975                 e1000_check_for_link(hw);
1976                 link_check = adapter->hw.mac.serdes_has_link;
1977                 break;
1978         default:
1979         case e1000_media_type_unknown:
1980                 break;
1981         }
1982
1983         /* Now we check if a transition has happened */
1984         if (link_check && (adapter->link_active == 0)) {
1985                 e1000_get_speed_and_duplex(&adapter->hw, 
1986                     &adapter->link_speed, &adapter->link_duplex);
1987                 if (bootverbose)
1988                         device_printf(dev, "Link is up %d Mbps %s\n",
1989                             adapter->link_speed,
1990                             ((adapter->link_duplex == FULL_DUPLEX) ?
1991                             "Full Duplex" : "Half Duplex"));
1992                 adapter->link_active = 1;
1993                 ifp->if_baudrate = adapter->link_speed * 1000000;
1994                 ifp->if_link_state = LINK_STATE_UP;
1995                 if_link_state_change(ifp);
1996         } else if (!link_check && (adapter->link_active == 1)) {
1997                 ifp->if_baudrate = adapter->link_speed = 0;
1998                 adapter->link_duplex = 0;
1999                 if (bootverbose)
2000                         device_printf(dev, "Link is Down\n");
2001                 adapter->link_active = 0;
2002                 ifp->if_link_state = LINK_STATE_DOWN;
2003                 if_link_state_change(ifp);
2004                 /* Turn off watchdogs */
2005                 for (int i = 0; i < adapter->num_queues; i++, txr++)
2006                         txr->watchdog_check = FALSE;
2007         }
2008 }
2009
2010 /*********************************************************************
2011  *
2012  *  This routine disables all traffic on the adapter by issuing a
2013  *  global reset on the MAC and deallocates TX/RX buffers.
2014  *
2015  **********************************************************************/
2016
2017 static void
2018 igb_stop(void *arg)
2019 {
2020         struct adapter  *adapter = arg;
2021         struct ifnet    *ifp = adapter->ifp;
2022         struct tx_ring *txr = adapter->tx_rings;
2023
2024         IGB_CORE_LOCK_ASSERT(adapter);
2025
2026         INIT_DEBUGOUT("igb_stop: begin");
2027
2028         igb_disable_intr(adapter);
2029
2030         callout_stop(&adapter->timer);
2031
2032         /* Tell the stack that the interface is no longer active */
2033         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2034
2035         /* Unarm watchdog timer. */
2036         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2037                 IGB_TX_LOCK(txr);
2038                 txr->watchdog_check = FALSE;
2039                 IGB_TX_UNLOCK(txr);
2040         }
2041
2042         e1000_reset_hw(&adapter->hw);
2043         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2044 }
2045
2046
2047 /*********************************************************************
2048  *
2049  *  Determine hardware revision.
2050  *
2051  **********************************************************************/
2052 static void
2053 igb_identify_hardware(struct adapter *adapter)
2054 {
2055         device_t dev = adapter->dev;
2056
2057         /* Make sure our PCI config space has the necessary stuff set */
2058         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2059         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2060             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2061                 device_printf(dev, "Memory Access and/or Bus Master bits "
2062                     "were not set!\n");
2063                 adapter->hw.bus.pci_cmd_word |=
2064                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2065                 pci_write_config(dev, PCIR_COMMAND,
2066                     adapter->hw.bus.pci_cmd_word, 2);
2067         }
2068
2069         /* Save off the information about this board */
2070         adapter->hw.vendor_id = pci_get_vendor(dev);
2071         adapter->hw.device_id = pci_get_device(dev);
2072         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2073         adapter->hw.subsystem_vendor_id =
2074             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2075         adapter->hw.subsystem_device_id =
2076             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2077
2078         /* Do Shared Code Init and Setup */
2079         if (e1000_set_mac_type(&adapter->hw)) {
2080                 device_printf(dev, "Setup init failure\n");
2081                 return;
2082         }
2083 }
2084
2085 static int
2086 igb_allocate_pci_resources(struct adapter *adapter)
2087 {
2088         device_t        dev = adapter->dev;
2089         int             rid;
2090
2091         rid = PCIR_BAR(0);
2092         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2093             &rid, RF_ACTIVE);
2094         if (adapter->pci_mem == NULL) {
2095                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2096                 return (ENXIO);
2097         }
2098         adapter->osdep.mem_bus_space_tag =
2099             rman_get_bustag(adapter->pci_mem);
2100         adapter->osdep.mem_bus_space_handle =
2101             rman_get_bushandle(adapter->pci_mem);
2102         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2103
2104         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2105
2106         /* This will setup either MSI/X or MSI */
2107         adapter->msix = igb_setup_msix(adapter);
2108         adapter->hw.back = &adapter->osdep;
2109
2110         return (0);
2111 }
2112
2113 /*********************************************************************
2114  *
2115  *  Setup the Legacy or MSI Interrupt handler
2116  *
2117  **********************************************************************/
2118 static int
2119 igb_allocate_legacy(struct adapter *adapter)
2120 {
2121         device_t dev = adapter->dev;
2122         int error, rid = 0;
2123
2124         /* Turn off all interrupts */
2125         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2126
2127         /* MSI RID is 1 */
2128         if (adapter->msix == 1)
2129                 rid = 1;
2130
2131         /* We allocate a single interrupt resource */
2132         adapter->res = bus_alloc_resource_any(dev,
2133             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2134         if (adapter->res == NULL) {
2135                 device_printf(dev, "Unable to allocate bus resource: "
2136                     "interrupt\n");
2137                 return (ENXIO);
2138         }
2139
2140         /*
2141          * Try allocating a fast interrupt and the associated deferred
2142          * processing contexts.
2143          */
2144         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2145         /* Make tasklet for deferred link handling */
2146         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2147         adapter->tq = taskqueue_create("igb_taskq", M_NOWAIT,
2148             taskqueue_thread_enqueue, &adapter->tq);
2149         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2150             device_get_nameunit(adapter->dev));
2151         if ((error = bus_setup_intr(dev, adapter->res,
2152             /*INTR_TYPE_NET |*/ INTR_MPSAFE, igb_irq_fast,
2153             adapter, &adapter->tag, NULL)) != 0) {
2154                 device_printf(dev, "Failed to register fast interrupt "
2155                             "handler: %d\n", error);
2156                 taskqueue_free(adapter->tq);
2157                 adapter->tq = NULL;
2158                 return (error);
2159         }
2160
2161         return (0);
2162 }
2163
2164
2165 /*********************************************************************
2166  *
2167  *  Setup the MSIX Queue Interrupt handlers: 
2168  *
2169  **********************************************************************/
2170 static int
2171 igb_allocate_msix(struct adapter *adapter)
2172 {
2173         device_t                dev = adapter->dev;
2174         struct igb_queue        *que = adapter->queues;
2175         int                     error, rid, vector = 0;
2176
2177
2178         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2179                 rid = vector + 1;
2180                 que->res = bus_alloc_resource_any(dev,
2181                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2182                 if (que->res == NULL) {
2183                         device_printf(dev,
2184                             "Unable to allocate bus resource: "
2185                             "MSIX Queue Interrupt\n");
2186                         return (ENXIO);
2187                 }
2188                 error = bus_setup_intr(dev, que->res,
2189                     /*INTR_TYPE_NET |*/ INTR_MPSAFE, 
2190                     igb_msix_que, que, &que->tag, NULL);
2191                 if (error) {
2192                         que->res = NULL;
2193                         device_printf(dev, "Failed to register Queue handler");
2194                         return (error);
2195                 }
2196                 que->msix = vector;
2197                 if (adapter->hw.mac.type == e1000_82575)
2198                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2199                 else
2200                         que->eims = 1 << vector;
2201                 /*
2202                 ** Bind the msix vector, and thus the
2203                 ** rings to the corresponding cpu.
2204                 */
2205 #if 0
2206                 if (adapter->num_queues > 1)
2207                         bus_bind_intr(dev, que->res, i);
2208 #endif
2209                 /* Make tasklet for deferred handling */
2210                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2211                 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2212                     taskqueue_thread_enqueue, &que->tq);
2213                 taskqueue_start_threads(&que->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s que",
2214                     device_get_nameunit(adapter->dev));
2215         }
2216
2217         /* And Link */
2218         rid = vector + 1;
2219         adapter->res = bus_alloc_resource_any(dev,
2220             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2221         if (adapter->res == NULL) {
2222                 device_printf(dev,
2223                     "Unable to allocate bus resource: "
2224                     "MSIX Link Interrupt\n");
2225                 return (ENXIO);
2226         }
2227         if ((error = bus_setup_intr(dev, adapter->res,
2228             /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2229             igb_msix_link, adapter, &adapter->tag, NULL)) != 0) {
2230                 device_printf(dev, "Failed to register Link handler");
2231                 return (error);
2232         }
2233         adapter->linkvec = vector;
2234
2235         /* Make tasklet for deferred handling */
2236         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2237         adapter->tq = taskqueue_create("igb_link", M_NOWAIT,
2238             taskqueue_thread_enqueue, &adapter->tq);
2239         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s link",
2240             device_get_nameunit(adapter->dev));
2241
2242         return (0);
2243 }
2244
2245
2246 static void
2247 igb_configure_queues(struct adapter *adapter)
2248 {
2249         struct  e1000_hw        *hw = &adapter->hw;
2250         struct  igb_queue       *que;
2251         u32                     tmp, ivar = 0;
2252         u32                     newitr = IGB_DEFAULT_ITR;
2253
2254         /* First turn on RSS capability */
2255         if (adapter->hw.mac.type > e1000_82575)
2256                 E1000_WRITE_REG(hw, E1000_GPIE,
2257                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2258                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2259
2260         /* Turn on MSIX */
2261         switch (adapter->hw.mac.type) {
2262         case e1000_82580:
2263                 /* RX entries */
2264                 for (int i = 0; i < adapter->num_queues; i++) {
2265                         u32 index = i >> 1;
2266                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2267                         que = &adapter->queues[i];
2268                         if (i & 1) {
2269                                 ivar &= 0xFF00FFFF;
2270                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2271                         } else {
2272                                 ivar &= 0xFFFFFF00;
2273                                 ivar |= que->msix | E1000_IVAR_VALID;
2274                         }
2275                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2276                 }
2277                 /* TX entries */
2278                 for (int i = 0; i < adapter->num_queues; i++) {
2279                         u32 index = i >> 1;
2280                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2281                         que = &adapter->queues[i];
2282                         if (i & 1) {
2283                                 ivar &= 0x00FFFFFF;
2284                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2285                         } else {
2286                                 ivar &= 0xFFFF00FF;
2287                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2288                         }
2289                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2290                         adapter->eims_mask |= que->eims;
2291                 }
2292
2293                 /* And for the link interrupt */
2294                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2295                 adapter->link_mask = 1 << adapter->linkvec;
2296                 adapter->eims_mask |= adapter->link_mask;
2297                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2298                 break;
2299         case e1000_82576:
2300                 /* RX entries */
2301                 for (int i = 0; i < adapter->num_queues; i++) {
2302                         u32 index = i & 0x7; /* Each IVAR has two entries */
2303                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2304                         que = &adapter->queues[i];
2305                         if (i < 8) {
2306                                 ivar &= 0xFFFFFF00;
2307                                 ivar |= que->msix | E1000_IVAR_VALID;
2308                         } else {
2309                                 ivar &= 0xFF00FFFF;
2310                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2311                         }
2312                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2313                         adapter->eims_mask |= que->eims;
2314                 }
2315                 /* TX entries */
2316                 for (int i = 0; i < adapter->num_queues; i++) {
2317                         u32 index = i & 0x7; /* Each IVAR has two entries */
2318                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2319                         que = &adapter->queues[i];
2320                         if (i < 8) {
2321                                 ivar &= 0xFFFF00FF;
2322                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2323                         } else {
2324                                 ivar &= 0x00FFFFFF;
2325                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2326                         }
2327                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2328                         adapter->eims_mask |= que->eims;
2329                 }
2330
2331                 /* And for the link interrupt */
2332                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2333                 adapter->link_mask = 1 << adapter->linkvec;
2334                 adapter->eims_mask |= adapter->link_mask;
2335                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2336                 break;
2337
2338         case e1000_82575:
2339                 /* enable MSI-X support*/
2340                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2341                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2342                 /* Auto-Mask interrupts upon ICR read. */
2343                 tmp |= E1000_CTRL_EXT_EIAME;
2344                 tmp |= E1000_CTRL_EXT_IRCA;
2345                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2346
2347                 /* Queues */
2348                 for (int i = 0; i < adapter->num_queues; i++) {
2349                         que = &adapter->queues[i];
2350                         tmp = E1000_EICR_RX_QUEUE0 << i;
2351                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2352                         que->eims = tmp;
2353                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2354                             i, que->eims);
2355                         adapter->eims_mask |= que->eims;
2356                 }
2357
2358                 /* Link */
2359                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2360                     E1000_EIMS_OTHER);
2361                 adapter->link_mask |= E1000_EIMS_OTHER;
2362                 adapter->eims_mask |= adapter->link_mask;
2363         default:
2364                 break;
2365         }
2366
2367         /* Set the starting interrupt rate */
2368         if (hw->mac.type == e1000_82575)
2369                 newitr |= newitr << 16;
2370         else
2371                 newitr |= 0x8000000;
2372
2373         for (int i = 0; i < adapter->num_queues; i++) {
2374                 que = &adapter->queues[i];
2375                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2376         }
2377
2378         return;
2379 }
2380
2381
2382 static void
2383 igb_free_pci_resources(struct adapter *adapter)
2384 {
2385         struct          igb_queue *que = adapter->queues;
2386         device_t        dev = adapter->dev;
2387         int             rid;
2388
2389         /*
2390         ** There is a slight possibility of a failure mode
2391         ** in attach that will result in entering this function
2392         ** before interrupt resources have been initialized, and
2393         ** in that case we do not want to execute the loops below
2394         ** We can detect this reliably by the state of the adapter
2395         ** res pointer.
2396         */
2397         if (adapter->res == NULL)
2398                 goto mem;
2399
2400         /*
2401          * First release all the interrupt resources:
2402          */
2403         for (int i = 0; i < adapter->num_queues; i++, que++) {
2404                 rid = que->msix + 1;
2405                 if (que->tag != NULL) {
2406                         bus_teardown_intr(dev, que->res, que->tag);
2407                         que->tag = NULL;
2408                 }
2409                 if (que->res != NULL)
2410                         bus_release_resource(dev,
2411                             SYS_RES_IRQ, rid, que->res);
2412         }
2413
2414         /* Clean the Legacy or Link interrupt last */
2415         if (adapter->linkvec) /* we are doing MSIX */
2416                 rid = adapter->linkvec + 1;
2417         else
2418                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2419
2420         if (adapter->tag != NULL) {
2421                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2422                 adapter->tag = NULL;
2423         }
2424         if (adapter->res != NULL)
2425                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2426
2427 mem:
2428         if (adapter->msix)
2429                 pci_release_msi(dev);
2430
2431         if (adapter->msix_mem != NULL)
2432                 bus_release_resource(dev, SYS_RES_MEMORY,
2433                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2434
2435         if (adapter->pci_mem != NULL)
2436                 bus_release_resource(dev, SYS_RES_MEMORY,
2437                     PCIR_BAR(0), adapter->pci_mem);
2438
2439 }
2440
2441 /*
2442  * Setup Either MSI/X or MSI
2443  */
2444 static int
2445 igb_setup_msix(struct adapter *adapter)
2446 {
2447         device_t dev = adapter->dev;
2448         int rid, want, queues, msgs;
2449
2450         /* tuneable override */
2451         if (igb_enable_msix == 0)
2452                 goto msi;
2453
2454         /* First try MSI/X */
2455         rid = PCIR_BAR(IGB_MSIX_BAR);
2456         adapter->msix_mem = bus_alloc_resource_any(dev,
2457             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2458         if (!adapter->msix_mem) {
2459                 /* May not be enabled */
2460                 device_printf(adapter->dev,
2461                     "Unable to map MSIX table \n");
2462                 goto msi;
2463         }
2464
2465         msgs = pci_msix_count(dev); 
2466         if (msgs == 0) { /* system has msix disabled */
2467                 bus_release_resource(dev, SYS_RES_MEMORY,
2468                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2469                 adapter->msix_mem = NULL;
2470                 goto msi;
2471         }
2472
2473         /* Figure out a reasonable auto config value */
2474         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2475
2476         /* Can have max of 4 queues on 82575 */
2477         if (adapter->hw.mac.type == e1000_82575) {
2478                 if (queues > 4)
2479                         queues = 4;
2480                 if (igb_num_queues > 4)
2481                         igb_num_queues = 4;
2482         }
2483
2484         if (igb_num_queues == 0)
2485                 igb_num_queues = queues;
2486
2487         /*
2488         ** One vector (RX/TX pair) per queue
2489         ** plus an additional for Link interrupt
2490         */
2491         want = igb_num_queues + 1;
2492         if (msgs >= want)
2493                 msgs = want;
2494         else {
2495                 device_printf(adapter->dev,
2496                     "MSIX Configuration Problem, "
2497                     "%d vectors configured, but %d queues wanted!\n",
2498                     msgs, want);
2499                 return (ENXIO);
2500         }
2501         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2502                 device_printf(adapter->dev,
2503                     "Using MSIX interrupts with %d vectors\n", msgs);
2504                 adapter->num_queues = igb_num_queues;
2505                 return (msgs);
2506         }
2507 msi:
2508         msgs = pci_msi_count(dev);
2509         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2510                 device_printf(adapter->dev,"Using MSI interrupt\n");
2511         return (msgs);
2512 }
2513
2514 /*********************************************************************
2515  *
2516  *  Set up an fresh starting state
2517  *
2518  **********************************************************************/
2519 static void
2520 igb_reset(struct adapter *adapter)
2521 {
2522         device_t        dev = adapter->dev;
2523         struct e1000_hw *hw = &adapter->hw;
2524         struct e1000_fc_info *fc = &hw->fc;
2525         struct ifnet    *ifp = adapter->ifp;
2526         u32             pba = 0;
2527         u16             hwm;
2528
2529         INIT_DEBUGOUT("igb_reset: begin");
2530
2531         /* Let the firmware know the OS is in control */
2532         igb_get_hw_control(adapter);
2533
2534         /*
2535          * Packet Buffer Allocation (PBA)
2536          * Writing PBA sets the receive portion of the buffer
2537          * the remainder is used for the transmit buffer.
2538          */
2539         switch (hw->mac.type) {
2540         case e1000_82575:
2541                 pba = E1000_PBA_32K;
2542                 break;
2543         case e1000_82576:
2544                 pba = E1000_PBA_64K;
2545                 break;
2546         case e1000_82580:
2547                 pba = E1000_PBA_35K;
2548         default:
2549                 break;
2550         }
2551
2552         /* Special needs in case of Jumbo frames */
2553         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2554                 u32 tx_space, min_tx, min_rx;
2555                 pba = E1000_READ_REG(hw, E1000_PBA);
2556                 tx_space = pba >> 16;
2557                 pba &= 0xffff;
2558                 min_tx = (adapter->max_frame_size +
2559                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2560                 min_tx = roundup2(min_tx, 1024);
2561                 min_tx >>= 10;
2562                 min_rx = adapter->max_frame_size;
2563                 min_rx = roundup2(min_rx, 1024);
2564                 min_rx >>= 10;
2565                 if (tx_space < min_tx &&
2566                     ((min_tx - tx_space) < pba)) {
2567                         pba = pba - (min_tx - tx_space);
2568                         /*
2569                          * if short on rx space, rx wins
2570                          * and must trump tx adjustment
2571                          */
2572                         if (pba < min_rx)
2573                                 pba = min_rx;
2574                 }
2575                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2576         }
2577
2578         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2579
2580         /*
2581          * These parameters control the automatic generation (Tx) and
2582          * response (Rx) to Ethernet PAUSE frames.
2583          * - High water mark should allow for at least two frames to be
2584          *   received after sending an XOFF.
2585          * - Low water mark works best when it is very near the high water mark.
2586          *   This allows the receiver to restart by sending XON when it has
2587          *   drained a bit.
2588          */
2589         hwm = min(((pba << 10) * 9 / 10),
2590             ((pba << 10) - 2 * adapter->max_frame_size));
2591
2592         if (hw->mac.type < e1000_82576) {
2593                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2594                 fc->low_water = fc->high_water - 8;
2595         } else {
2596                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2597                 fc->low_water = fc->high_water - 16;
2598         }
2599
2600         fc->pause_time = IGB_FC_PAUSE_TIME;
2601         fc->send_xon = TRUE;
2602
2603         /* Set Flow control, use the tunable location if sane */
2604         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2605                 fc->requested_mode = igb_fc_setting;
2606         else
2607                 fc->requested_mode = e1000_fc_none;
2608
2609         fc->current_mode = fc->requested_mode;
2610
2611         /* Issue a global reset */
2612         e1000_reset_hw(hw);
2613         E1000_WRITE_REG(hw, E1000_WUC, 0);
2614
2615         if (e1000_init_hw(hw) < 0)
2616                 device_printf(dev, "Hardware Initialization Failed\n");
2617
2618         if (hw->mac.type == e1000_82580) {
2619                 u32 reg;
2620
2621                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2622                 /*
2623                  * 0x80000000 - enable DMA COAL
2624                  * 0x10000000 - use L0s as low power
2625                  * 0x20000000 - use L1 as low power
2626                  * X << 16 - exit dma coal when rx data exceeds X kB
2627                  * Y - upper limit to stay in dma coal in units of 32usecs
2628                  */
2629                 E1000_WRITE_REG(hw, E1000_DMACR,
2630                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2631
2632                 /* set hwm to PBA -  2 * max frame size */
2633                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2634                 /*
2635                  * This sets the time to wait before requesting transition to
2636                  * low power state to number of usecs needed to receive 1 512
2637                  * byte frame at gigabit line rate
2638                  */
2639                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2640
2641                 /* free space in tx packet buffer to wake from DMA coal */
2642                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2643                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2644
2645                 /* make low power state decision controlled by DMA coal */
2646                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2647                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2648                     reg | E1000_PCIEMISC_LX_DECISION);
2649         }
2650
2651         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2652         e1000_get_phy_info(hw);
2653         e1000_check_for_link(hw);
2654         return;
2655 }
2656
2657 /*********************************************************************
2658  *
2659  *  Setup networking device structure and register an interface.
2660  *
2661  **********************************************************************/
2662 static void
2663 igb_setup_interface(device_t dev, struct adapter *adapter)
2664 {
2665         struct ifnet   *ifp;
2666
2667         INIT_DEBUGOUT("igb_setup_interface: begin");
2668
2669         ifp = adapter->ifp = &adapter->arpcom.ac_if;
2670         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2671         ifp->if_mtu = ETHERMTU;
2672         ifp->if_init =  igb_init;
2673         ifp->if_softc = adapter;
2674         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2675         ifp->if_ioctl = igb_ioctl;
2676         ifp->if_start = igb_start;
2677 #if __FreeBSD_version >= 800000
2678         ifp->if_transmit = igb_mq_start;
2679         ifp->if_qflush = igb_qflush;
2680 #endif
2681         ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2682         ifq_set_ready(&ifp->if_snd);
2683
2684         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2685
2686         ifp->if_capabilities = ifp->if_capenable = 0;
2687
2688         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2689 #ifdef NET_TSO
2690         ifp->if_capabilities |= IFCAP_TSO4;
2691 #endif
2692         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2693 #ifdef NET_LRO
2694         if (igb_header_split)
2695                 ifp->if_capabilities |= IFCAP_LRO;
2696 #endif
2697
2698         ifp->if_capenable = ifp->if_capabilities;
2699 #ifdef DEVICE_POLLING
2700         ifp->if_capabilities |= IFCAP_POLLING;
2701 #endif
2702
2703         /*
2704          * Tell the upper layer(s) we support long frames.
2705          */
2706         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2707         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2708         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2709
2710         /*
2711          * Specify the media types supported by this adapter and register
2712          * callbacks to update media and link information
2713          */
2714         ifmedia_init(&adapter->media, IFM_IMASK,
2715             igb_media_change, igb_media_status);
2716         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2717             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2718                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2719                             0, NULL);
2720                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2721         } else {
2722                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2723                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2724                             0, NULL);
2725                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2726                             0, NULL);
2727                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2728                             0, NULL);
2729                 if (adapter->hw.phy.type != e1000_phy_ife) {
2730                         ifmedia_add(&adapter->media,
2731                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2732                         ifmedia_add(&adapter->media,
2733                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2734                 }
2735         }
2736         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2737         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2738 }
2739
2740
2741 /*
2742  * Manage DMA'able memory.
2743  */
2744 static void
2745 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2746 {
2747         if (error)
2748                 return;
2749         *(bus_addr_t *) arg = segs[0].ds_addr;
2750 }
2751
2752 static int
2753 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2754         struct igb_dma_alloc *dma, int mapflags)
2755 {
2756         int error;
2757
2758         error = bus_dma_tag_create(NULL,                /* parent */
2759                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2760                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2761                                 BUS_SPACE_MAXADDR,      /* highaddr */
2762                                 NULL, NULL,             /* filter, filterarg */
2763                                 size,                   /* maxsize */
2764                                 1,                      /* nsegments */
2765                                 size,                   /* maxsegsize */
2766                                 0,                      /* flags */
2767                                 &dma->dma_tag);
2768         if (error) {
2769                 device_printf(adapter->dev,
2770                     "%s: bus_dma_tag_create failed: %d\n",
2771                     __func__, error);
2772                 goto fail_0;
2773         }
2774
2775         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2776             BUS_DMA_NOWAIT, &dma->dma_map);
2777         if (error) {
2778                 device_printf(adapter->dev,
2779                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2780                     __func__, (uintmax_t)size, error);
2781                 goto fail_2;
2782         }
2783
2784         dma->dma_paddr = 0;
2785         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2786             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2787         if (error || dma->dma_paddr == 0) {
2788                 device_printf(adapter->dev,
2789                     "%s: bus_dmamap_load failed: %d\n",
2790                     __func__, error);
2791                 goto fail_3;
2792         }
2793
2794         return (0);
2795
2796 fail_3:
2797         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2798 fail_2:
2799         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2800         bus_dma_tag_destroy(dma->dma_tag);
2801 fail_0:
2802         dma->dma_map = NULL;
2803         dma->dma_tag = NULL;
2804
2805         return (error);
2806 }
2807
2808 static void
2809 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2810 {
2811         if (dma->dma_tag == NULL)
2812                 return;
2813         if (dma->dma_map != NULL) {
2814                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2815                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2816                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2817                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2818                 dma->dma_map = NULL;
2819         }
2820         bus_dma_tag_destroy(dma->dma_tag);
2821         dma->dma_tag = NULL;
2822 }
2823
2824
2825 /*********************************************************************
2826  *
2827  *  Allocate memory for the transmit and receive rings, and then
2828  *  the descriptors associated with each, called only once at attach.
2829  *
2830  **********************************************************************/
2831 static int
2832 igb_allocate_queues(struct adapter *adapter)
2833 {
2834         device_t dev = adapter->dev;
2835         struct igb_queue        *que = NULL;
2836         struct tx_ring          *txr = NULL;
2837         struct rx_ring          *rxr = NULL;
2838         int rsize, tsize, error = E1000_SUCCESS;
2839         int txconf = 0, rxconf = 0;
2840
2841         /* First allocate the top level queue structs */
2842         if (!(adapter->queues =
2843             (struct igb_queue *) kmalloc(sizeof(struct igb_queue) *
2844             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2845                 device_printf(dev, "Unable to allocate queue memory\n");
2846                 error = ENOMEM;
2847                 goto fail;
2848         }
2849
2850         /* Next allocate the TX ring struct memory */
2851         if (!(adapter->tx_rings =
2852             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2853             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2854                 device_printf(dev, "Unable to allocate TX ring memory\n");
2855                 error = ENOMEM;
2856                 goto tx_fail;
2857         }
2858
2859         /* Now allocate the RX */
2860         if (!(adapter->rx_rings =
2861             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2862             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2863                 device_printf(dev, "Unable to allocate RX ring memory\n");
2864                 error = ENOMEM;
2865                 goto rx_fail;
2866         }
2867
2868         tsize = roundup2(adapter->num_tx_desc *
2869             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2870         /*
2871          * Now set up the TX queues, txconf is needed to handle the
2872          * possibility that things fail midcourse and we need to
2873          * undo memory gracefully
2874          */ 
2875         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2876                 /* Set up some basics */
2877                 txr = &adapter->tx_rings[i];
2878                 txr->adapter = adapter;
2879                 txr->me = i;
2880
2881                 /* Initialize the TX lock */
2882                 ksnprintf(txr->spin_name, sizeof(txr->spin_name), "%s:tx(%d)",
2883                     device_get_nameunit(dev), txr->me);
2884
2885                 spin_init(&txr->tx_spin);
2886
2887                 if (igb_dma_malloc(adapter, tsize,
2888                         &txr->txdma, BUS_DMA_NOWAIT)) {
2889                         device_printf(dev,
2890                             "Unable to allocate TX Descriptor memory\n");
2891                         error = ENOMEM;
2892                         goto err_tx_desc;
2893                 }
2894                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2895                 bzero((void *)txr->tx_base, tsize);
2896
2897                 /* Now allocate transmit buffers for the ring */
2898                 if (igb_allocate_transmit_buffers(txr)) {
2899                         device_printf(dev,
2900                             "Critical Failure setting up transmit buffers\n");
2901                         error = ENOMEM;
2902                         goto err_tx_desc;
2903                 }
2904 #if __FreeBSD_version >= 800000
2905                 /* Allocate a buf ring */
2906                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2907                     M_WAITOK, &txr->tx_mtx);
2908 #endif
2909         }
2910
2911         /*
2912          * Next the RX queues...
2913          */ 
2914         rsize = roundup2(adapter->num_rx_desc *
2915             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2916         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2917                 rxr = &adapter->rx_rings[i];
2918                 rxr->adapter = adapter;
2919                 rxr->me = i;
2920
2921                 /* Initialize the RX lock */
2922                 ksnprintf(rxr->spin_name, sizeof(rxr->spin_name), "%s:rx(%d)",
2923                     device_get_nameunit(dev), txr->me);
2924
2925                 spin_init(&rxr->rx_spin);
2926
2927                 if (igb_dma_malloc(adapter, rsize,
2928                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2929                         device_printf(dev,
2930                             "Unable to allocate RxDescriptor memory\n");
2931                         error = ENOMEM;
2932                         goto err_rx_desc;
2933                 }
2934                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2935                 bzero((void *)rxr->rx_base, rsize);
2936
2937                 /* Allocate receive buffers for the ring*/
2938                 if (igb_allocate_receive_buffers(rxr)) {
2939                         device_printf(dev,
2940                             "Critical Failure setting up receive buffers\n");
2941                         error = ENOMEM;
2942                         goto err_rx_desc;
2943                 }
2944         }
2945
2946         /*
2947         ** Finally set up the queue holding structs
2948         */
2949         for (int i = 0; i < adapter->num_queues; i++) {
2950                 que = &adapter->queues[i];
2951                 que->adapter = adapter;
2952                 que->txr = &adapter->tx_rings[i];
2953                 que->rxr = &adapter->rx_rings[i];
2954         }
2955
2956         return (0);
2957
2958 err_rx_desc:
2959         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2960                 igb_dma_free(adapter, &rxr->rxdma);
2961 err_tx_desc:
2962         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2963                 igb_dma_free(adapter, &txr->txdma);
2964         kfree(adapter->rx_rings, M_DEVBUF);
2965 rx_fail:
2966 #if __FreeBSD_version >= 800000
2967         buf_ring_free(txr->br, M_DEVBUF);
2968 #endif
2969         kfree(adapter->tx_rings, M_DEVBUF);
2970 tx_fail:
2971         kfree(adapter->queues, M_DEVBUF);
2972 fail:
2973         return (error);
2974 }
2975
2976 /*********************************************************************
2977  *
2978  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2979  *  the information needed to transmit a packet on the wire. This is
2980  *  called only once at attach, setup is done every reset.
2981  *
2982  **********************************************************************/
2983 static int
2984 igb_allocate_transmit_buffers(struct tx_ring *txr)
2985 {
2986         struct adapter *adapter = txr->adapter;
2987         device_t dev = adapter->dev;
2988         struct igb_tx_buffer *txbuf;
2989         int error, i;
2990
2991         /*
2992          * Setup DMA descriptor areas.
2993          */
2994         if ((error = bus_dma_tag_create(NULL,
2995                                1, 0,                    /* alignment, bounds */
2996                                BUS_SPACE_MAXADDR,       /* lowaddr */
2997                                BUS_SPACE_MAXADDR,       /* highaddr */
2998                                NULL, NULL,              /* filter, filterarg */
2999                                IGB_TSO_SIZE,            /* maxsize */
3000                                IGB_MAX_SCATTER,         /* nsegments */
3001                                PAGE_SIZE,               /* maxsegsize */
3002                                0,                       /* flags */
3003                                &txr->txtag))) {
3004                 device_printf(dev,"Unable to allocate TX DMA tag\n");
3005                 goto fail;
3006         }
3007
3008         if (!(txr->tx_buffers =
3009             (struct igb_tx_buffer *) kmalloc(sizeof(struct igb_tx_buffer) *
3010             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3011                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3012                 error = ENOMEM;
3013                 goto fail;
3014         }
3015
3016         /* Create the descriptor buffer dma maps */
3017         txbuf = txr->tx_buffers;
3018         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3019                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3020                 if (error != 0) {
3021                         device_printf(dev, "Unable to create TX DMA map\n");
3022                         goto fail;
3023                 }
3024         }
3025
3026         return 0;
3027 fail:
3028         /* We free all, it handles case where we are in the middle */
3029         igb_free_transmit_structures(adapter);
3030         return (error);
3031 }
3032
3033 /*********************************************************************
3034  *
3035  *  Initialize a transmit ring.
3036  *
3037  **********************************************************************/
3038 static void
3039 igb_setup_transmit_ring(struct tx_ring *txr)
3040 {
3041         struct adapter *adapter = txr->adapter;
3042         struct igb_tx_buffer *txbuf;
3043         int i;
3044
3045         /* Clear the old descriptor contents */
3046         IGB_TX_LOCK(txr);
3047         bzero((void *)txr->tx_base,
3048               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3049         /* Reset indices */
3050         txr->next_avail_desc = 0;
3051         txr->next_to_clean = 0;
3052
3053         /* Free any existing tx buffers. */
3054         txbuf = txr->tx_buffers;
3055         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3056                 if (txbuf->m_head != NULL) {
3057                         bus_dmamap_sync(txr->txtag, txbuf->map,
3058                             BUS_DMASYNC_POSTWRITE);
3059                         bus_dmamap_unload(txr->txtag, txbuf->map);
3060                         m_freem(txbuf->m_head);
3061                         txbuf->m_head = NULL;
3062                 }
3063                 /* clear the watch index */
3064                 txbuf->next_eop = -1;
3065         }
3066
3067         /* Set number of descriptors available */
3068         txr->tx_avail = adapter->num_tx_desc;
3069
3070         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3071             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3072         IGB_TX_UNLOCK(txr);
3073 }
3074
3075 /*********************************************************************
3076  *
3077  *  Initialize all transmit rings.
3078  *
3079  **********************************************************************/
3080 static void
3081 igb_setup_transmit_structures(struct adapter *adapter)
3082 {
3083         struct tx_ring *txr = adapter->tx_rings;
3084
3085         for (int i = 0; i < adapter->num_queues; i++, txr++)
3086                 igb_setup_transmit_ring(txr);
3087
3088         return;
3089 }
3090
3091 /*********************************************************************
3092  *
3093  *  Enable transmit unit.
3094  *
3095  **********************************************************************/
3096 static void
3097 igb_initialize_transmit_units(struct adapter *adapter)
3098 {
3099         struct tx_ring  *txr = adapter->tx_rings;
3100         struct e1000_hw *hw = &adapter->hw;
3101         u32             tctl, txdctl;
3102
3103         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3104
3105         /* Setup the Tx Descriptor Rings */
3106         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3107                 u64 bus_addr = txr->txdma.dma_paddr;
3108
3109                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3110                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3111                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3112                     (uint32_t)(bus_addr >> 32));
3113                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3114                     (uint32_t)bus_addr);
3115
3116                 /* Setup the HW Tx Head and Tail descriptor pointers */
3117                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3118                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3119
3120                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3121                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3122                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3123
3124                 txr->watchdog_check = FALSE;
3125
3126                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3127                 txdctl |= IGB_TX_PTHRESH;
3128                 txdctl |= IGB_TX_HTHRESH << 8;
3129                 txdctl |= IGB_TX_WTHRESH << 16;
3130                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3131                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3132         }
3133
3134         /* Program the Transmit Control Register */
3135         tctl = E1000_READ_REG(hw, E1000_TCTL);
3136         tctl &= ~E1000_TCTL_CT;
3137         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3138                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3139
3140         e1000_config_collision_dist(hw);
3141
3142         /* This write will effectively turn on the transmit unit. */
3143         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3144 }
3145
3146 /*********************************************************************
3147  *
3148  *  Free all transmit rings.
3149  *
3150  **********************************************************************/
3151 static void
3152 igb_free_transmit_structures(struct adapter *adapter)
3153 {
3154         struct tx_ring *txr = adapter->tx_rings;
3155
3156         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3157                 IGB_TX_LOCK(txr);
3158                 igb_free_transmit_buffers(txr);
3159                 igb_dma_free(adapter, &txr->txdma);
3160                 IGB_TX_UNLOCK(txr);
3161                 IGB_TX_LOCK_DESTROY(txr);
3162         }
3163         kfree(adapter->tx_rings, M_DEVBUF);
3164 }
3165
3166 /*********************************************************************
3167  *
3168  *  Free transmit ring related data structures.
3169  *
3170  **********************************************************************/
3171 static void
3172 igb_free_transmit_buffers(struct tx_ring *txr)
3173 {
3174         struct adapter *adapter = txr->adapter;
3175         struct igb_tx_buffer *tx_buffer;
3176         int             i;
3177
3178         INIT_DEBUGOUT("free_transmit_ring: begin");
3179
3180         if (txr->tx_buffers == NULL)
3181                 return;
3182
3183         tx_buffer = txr->tx_buffers;
3184         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3185                 if (tx_buffer->m_head != NULL) {
3186                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3187                             BUS_DMASYNC_POSTWRITE);
3188                         bus_dmamap_unload(txr->txtag,
3189                             tx_buffer->map);
3190                         m_freem(tx_buffer->m_head);
3191                         tx_buffer->m_head = NULL;
3192                         if (tx_buffer->map != NULL) {
3193                                 bus_dmamap_destroy(txr->txtag,
3194                                     tx_buffer->map);
3195                                 tx_buffer->map = NULL;
3196                         }
3197                 } else if (tx_buffer->map != NULL) {
3198                         bus_dmamap_unload(txr->txtag,
3199                             tx_buffer->map);
3200                         bus_dmamap_destroy(txr->txtag,
3201                             tx_buffer->map);
3202                         tx_buffer->map = NULL;
3203                 }
3204         }
3205 #if __FreeBSD_version >= 800000
3206         if (txr->br != NULL)
3207                 buf_ring_free(txr->br, M_DEVBUF);
3208 #endif
3209         if (txr->tx_buffers != NULL) {
3210                 kfree(txr->tx_buffers, M_DEVBUF);
3211                 txr->tx_buffers = NULL;
3212         }
3213         if (txr->txtag != NULL) {
3214                 bus_dma_tag_destroy(txr->txtag);
3215                 txr->txtag = NULL;
3216         }
3217         return;
3218 }
3219
3220 /**********************************************************************
3221  *
3222  *  Setup work for hardware segmentation offload (TSO)
3223  *
3224  **********************************************************************/
3225 #ifdef NET_TSO 
3226 static boolean_t
3227 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3228 {
3229         struct adapter *adapter = txr->adapter;
3230         struct e1000_adv_tx_context_desc *TXD;
3231         struct igb_tx_buffer        *tx_buffer;
3232         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3233         u32 mss_l4len_idx = 0;
3234         u16 vtag = 0;
3235         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3236         struct ether_vlan_header *eh;
3237         struct ip *ip;
3238         struct tcphdr *th;
3239
3240
3241         /*
3242          * Determine where frame payload starts.
3243          * Jump over vlan headers if already present
3244          */
3245         eh = mtod(mp, struct ether_vlan_header *);
3246         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3247                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3248         else
3249                 ehdrlen = ETHER_HDR_LEN;
3250
3251         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3252         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3253                 return FALSE;
3254
3255         /* Only supports IPV4 for now */
3256         ctxd = txr->next_avail_desc;
3257         tx_buffer = &txr->tx_buffers[ctxd];
3258         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3259
3260         ip = (struct ip *)(mp->m_data + ehdrlen);
3261         if (ip->ip_p != IPPROTO_TCP)
3262                 return FALSE;   /* 0 */
3263         ip->ip_sum = 0;
3264         ip_hlen = ip->ip_hl << 2;
3265         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3266         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3267             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3268         tcp_hlen = th->th_off << 2;
3269         /*
3270          * Calculate header length, this is used
3271          * in the transmit desc in igb_xmit
3272          */
3273         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3274
3275         /* VLAN MACLEN IPLEN */
3276         if (mp->m_flags & M_VLANTAG) {
3277                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3278                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3279         }
3280
3281         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3282         vlan_macip_lens |= ip_hlen;
3283         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3284
3285         /* ADV DTYPE TUCMD */
3286         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3287         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3288         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3289         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3290
3291         /* MSS L4LEN IDX */
3292         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3293         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3294         /* 82575 needs the queue index added */
3295         if (adapter->hw.mac.type == e1000_82575)
3296                 mss_l4len_idx |= txr->me << 4;
3297         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3298
3299         TXD->seqnum_seed = htole32(0);
3300         tx_buffer->m_head = NULL;
3301         tx_buffer->next_eop = -1;
3302
3303         if (++ctxd == adapter->num_tx_desc)
3304                 ctxd = 0;
3305
3306         txr->tx_avail--;
3307         txr->next_avail_desc = ctxd;
3308         return TRUE;
3309 }
3310 #endif
3311
3312 /*********************************************************************
3313  *
3314  *  Context Descriptor setup for VLAN or CSUM
3315  *
3316  **********************************************************************/
3317
3318 static bool
3319 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3320 {
3321         struct adapter *adapter = txr->adapter;
3322         struct e1000_adv_tx_context_desc *TXD;
3323         struct igb_tx_buffer        *tx_buffer;
3324         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3325         struct ether_vlan_header *eh;
3326         struct ip *ip = NULL;
3327         struct ip6_hdr *ip6;
3328         int  ehdrlen, ctxd, ip_hlen = 0;
3329         u16     etype, vtag = 0;
3330         u8      ipproto = 0;
3331         bool    offload = TRUE;
3332
3333         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3334                 offload = FALSE;
3335
3336         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3337         ctxd = txr->next_avail_desc;
3338         tx_buffer = &txr->tx_buffers[ctxd];
3339         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3340
3341         /*
3342         ** In advanced descriptors the vlan tag must 
3343         ** be placed into the context descriptor, thus
3344         ** we need to be here just for that setup.
3345         */
3346         if (mp->m_flags & M_VLANTAG) {
3347                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3348                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3349         } else if (offload == FALSE)
3350                 return FALSE;
3351
3352         /*
3353          * Determine where frame payload starts.
3354          * Jump over vlan headers if already present,
3355          * helpful for QinQ too.
3356          */
3357         eh = mtod(mp, struct ether_vlan_header *);
3358         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3359                 etype = ntohs(eh->evl_proto);
3360                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3361         } else {
3362                 etype = ntohs(eh->evl_encap_proto);
3363                 ehdrlen = ETHER_HDR_LEN;
3364         }
3365
3366         /* Set the ether header length */
3367         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3368
3369         switch (etype) {
3370                 case ETHERTYPE_IP:
3371                         ip = (struct ip *)(mp->m_data + ehdrlen);
3372                         ip_hlen = ip->ip_hl << 2;
3373                         if (mp->m_len < ehdrlen + ip_hlen) {
3374                                 offload = FALSE;
3375                                 break;
3376                         }
3377                         ipproto = ip->ip_p;
3378                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3379                         break;
3380                 case ETHERTYPE_IPV6:
3381                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3382                         ip_hlen = sizeof(struct ip6_hdr);
3383                         if (mp->m_len < ehdrlen + ip_hlen)
3384                                 return (FALSE);
3385                         ipproto = ip6->ip6_nxt;
3386                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3387                         break;
3388                 default:
3389                         offload = FALSE;
3390                         break;
3391         }
3392
3393         vlan_macip_lens |= ip_hlen;
3394         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3395
3396         switch (ipproto) {
3397                 case IPPROTO_TCP:
3398                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3399                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3400                         break;
3401                 case IPPROTO_UDP:
3402                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3403                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3404                         break;
3405 #if __FreeBSD_version >= 800000
3406                 case IPPROTO_SCTP:
3407                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3408                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3409                         break;
3410 #endif
3411                 default:
3412                         offload = FALSE;
3413                         break;
3414         }
3415
3416         /* 82575 needs the queue index added */
3417         if (adapter->hw.mac.type == e1000_82575)
3418                 mss_l4len_idx = txr->me << 4;
3419
3420         /* Now copy bits into descriptor */
3421         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3422         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3423         TXD->seqnum_seed = htole32(0);
3424         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3425
3426         tx_buffer->m_head = NULL;
3427         tx_buffer->next_eop = -1;
3428
3429         /* We've consumed the first desc, adjust counters */
3430         if (++ctxd == adapter->num_tx_desc)
3431                 ctxd = 0;
3432         txr->next_avail_desc = ctxd;
3433         --txr->tx_avail;
3434
3435         return (offload);
3436 }
3437
3438
3439 /**********************************************************************
3440  *
3441  *  Examine each tx_buffer in the used queue. If the hardware is done
3442  *  processing the packet then free associated resources. The
3443  *  tx_buffer is put back on the free queue.
3444  *
3445  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3446  **********************************************************************/
3447 static bool
3448 igb_txeof(struct tx_ring *txr)
3449 {
3450         struct adapter  *adapter = txr->adapter;
3451         int first, last, done;
3452         struct igb_tx_buffer *tx_buffer;
3453         struct e1000_tx_desc   *tx_desc, *eop_desc;
3454         struct ifnet   *ifp = adapter->ifp;
3455
3456         IGB_TX_LOCK_ASSERT(txr);
3457
3458         if (txr->tx_avail == adapter->num_tx_desc)
3459                 return FALSE;
3460
3461         first = txr->next_to_clean;
3462         tx_desc = &txr->tx_base[first];
3463         tx_buffer = &txr->tx_buffers[first];
3464         last = tx_buffer->next_eop;
3465         eop_desc = &txr->tx_base[last];
3466
3467         /*
3468          * What this does is get the index of the
3469          * first descriptor AFTER the EOP of the 
3470          * first packet, that way we can do the
3471          * simple comparison on the inner while loop.
3472          */
3473         if (++last == adapter->num_tx_desc)
3474                 last = 0;
3475         done = last;
3476
3477         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3478             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3479
3480         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3481                 /* We clean the range of the packet */
3482                 while (first != done) {
3483                         tx_desc->upper.data = 0;
3484                         tx_desc->lower.data = 0;
3485                         tx_desc->buffer_addr = 0;
3486                         ++txr->tx_avail;
3487
3488                         if (tx_buffer->m_head) {
3489                                 txr->bytes +=
3490                                     tx_buffer->m_head->m_pkthdr.len;
3491                                 bus_dmamap_sync(txr->txtag,
3492                                     tx_buffer->map,
3493                                     BUS_DMASYNC_POSTWRITE);
3494                                 bus_dmamap_unload(txr->txtag,
3495                                     tx_buffer->map);
3496
3497                                 m_freem(tx_buffer->m_head);
3498                                 tx_buffer->m_head = NULL;
3499                         }
3500                         tx_buffer->next_eop = -1;
3501                         txr->watchdog_time = ticks;
3502
3503                         if (++first == adapter->num_tx_desc)
3504                                 first = 0;
3505
3506                         tx_buffer = &txr->tx_buffers[first];
3507                         tx_desc = &txr->tx_base[first];
3508                 }
3509                 ++txr->packets;
3510                 ++ifp->if_opackets;
3511                 /* See if we can continue to the next packet */
3512                 last = tx_buffer->next_eop;
3513                 if (last != -1) {
3514                         eop_desc = &txr->tx_base[last];
3515                         /* Get new done point */
3516                         if (++last == adapter->num_tx_desc) last = 0;
3517                         done = last;
3518                 } else
3519                         break;
3520         }
3521         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3522             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3523
3524         txr->next_to_clean = first;
3525
3526         /*
3527          * If we have enough room, clear IFF_DRV_OACTIVE
3528          * to tell the stack that it is OK to send packets.
3529          */
3530         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3531                 ifp->if_flags &= ~IFF_OACTIVE;
3532                 /* All clean, turn off the watchdog */
3533                 if (txr->tx_avail == adapter->num_tx_desc) {
3534                         txr->watchdog_check = FALSE;
3535                         return FALSE;
3536                 }
3537         }
3538
3539         return (TRUE);
3540 }
3541
3542
3543 /*********************************************************************
3544  *
3545  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3546  *              i - designates the ring index
3547  *              clean - tells the function whether to update
3548  *                      the header, the packet buffer, or both.
3549  *
3550  **********************************************************************/
3551 static int
3552 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3553 {
3554         struct adapter          *adapter = rxr->adapter;
3555         struct igb_rx_buf       *rxbuf;
3556         struct mbuf             *mh, *mp;
3557         bus_dma_segment_t       hseg[1];
3558         bus_dma_segment_t       pseg[1];
3559         bus_dmamap_t            map;
3560         int                     nsegs, error;
3561
3562
3563         rxbuf = &rxr->rx_buffers[i];
3564         mh = mp = NULL;
3565         if ((clean & IGB_CLEAN_HEADER) != 0) {
3566                 mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3567                 if (mh == NULL) {
3568                         adapter->mbuf_header_failed++;          
3569                         return (ENOBUFS);
3570                 }
3571                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3572                 /*
3573                  * Because IGB_HDR_BUF size is less than MHLEN
3574                  * and we configure controller to split headers
3575                  * we can align mbuf on ETHER_ALIGN boundary.
3576                  */
3577                 m_adj(mh, ETHER_ALIGN);
3578                 error = bus_dmamap_load_mbuf_segment(rxr->rx_htag,
3579                     rxr->rx_hspare_map, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3580                 if (error != 0) {
3581                         m_freem(mh);
3582                         return (error);
3583                 }
3584                 mh->m_flags &= ~M_PKTHDR;
3585         }
3586         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3587                 mp = m_getl(adapter->rx_mbuf_sz,
3588                     MB_DONTWAIT, MT_DATA, M_PKTHDR, NULL);
3589 #if 0
3590                 mp = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR,
3591                     adapter->rx_mbuf_sz);
3592 #endif
3593                 if (mp == NULL) {
3594                         if (mh != NULL) {
3595                                 adapter->mbuf_packet_failed++;          
3596                                 bus_dmamap_unload(rxr->rx_htag,
3597                                     rxbuf->head_map);
3598                                 mh->m_flags |= M_PKTHDR;
3599                                 m_freem(mh);
3600                         }
3601                         return (ENOBUFS);
3602                 }
3603                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3604                 error = bus_dmamap_load_mbuf_segment(rxr->rx_ptag,
3605                     rxr->rx_pspare_map, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3606                 if (error != 0) {
3607                         if (mh != NULL) {
3608                                 bus_dmamap_unload(rxr->rx_htag,
3609                                     rxbuf->head_map);
3610                                 mh->m_flags |= M_PKTHDR;
3611                                 m_freem(mh);
3612                         }
3613                         m_freem(mp);
3614                         return (error);
3615                 }
3616                 mp->m_flags &= ~M_PKTHDR;
3617         }
3618
3619         /* Loading new DMA maps complete, unload maps for received buffers. */
3620         if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3621                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3622                     BUS_DMASYNC_POSTREAD);
3623                 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3624         }
3625         if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3626                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3627                     BUS_DMASYNC_POSTREAD);
3628                 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3629         }
3630
3631         /* Reflect loaded dmamaps. */
3632         if ((clean & IGB_CLEAN_HEADER) != 0) {
3633                 map = rxbuf->head_map;
3634                 rxbuf->head_map = rxr->rx_hspare_map;
3635                 rxr->rx_hspare_map = map;
3636                 rxbuf->m_head = mh;
3637                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3638                     BUS_DMASYNC_PREREAD);
3639                 rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3640         }
3641         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3642                 map = rxbuf->pack_map;
3643                 rxbuf->pack_map = rxr->rx_pspare_map;
3644                 rxr->rx_pspare_map = map;
3645                 rxbuf->m_pack = mp;
3646                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3647                     BUS_DMASYNC_PREREAD);
3648                 rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3649         }
3650
3651         return (0);
3652 }
3653
3654 /*********************************************************************
3655  *
3656  *  Allocate memory for rx_buffer structures. Since we use one
3657  *  rx_buffer per received packet, the maximum number of rx_buffer's
3658  *  that we'll need is equal to the number of receive descriptors
3659  *  that we've allocated.
3660  *
3661  **********************************************************************/
3662 static int
3663 igb_allocate_receive_buffers(struct rx_ring *rxr)
3664 {
3665         struct  adapter         *adapter = rxr->adapter;
3666         device_t                dev = adapter->dev;
3667         struct igb_rx_buf       *rxbuf;
3668         int                     i, bsize, error;
3669
3670         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3671         if (!(rxr->rx_buffers =
3672             (struct igb_rx_buf *) kmalloc(bsize,
3673             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3674                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3675                 error = ENOMEM;
3676                 goto fail;
3677         }
3678
3679         if ((error = bus_dma_tag_create(NULL,
3680                                    1, 0,                /* alignment, bounds */
3681                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3682                                    BUS_SPACE_MAXADDR,   /* highaddr */
3683                                    NULL, NULL,          /* filter, filterarg */
3684                                    MSIZE,               /* maxsize */
3685                                    1,                   /* nsegments */
3686                                    MSIZE,               /* maxsegsize */
3687                                    0,                   /* flags */
3688                                    &rxr->rx_htag))) {
3689                 device_printf(dev, "Unable to create RX DMA tag\n");
3690                 goto fail;
3691         }
3692
3693         if ((error = bus_dma_tag_create(NULL,
3694                                    1, 0,                /* alignment, bounds */
3695                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3696                                    BUS_SPACE_MAXADDR,   /* highaddr */
3697                                    NULL, NULL,          /* filter, filterarg */
3698                                    MJUMPAGESIZE,        /* maxsize */
3699                                    1,                   /* nsegments */
3700                                    MJUMPAGESIZE,        /* maxsegsize */
3701                                    0,                   /* flags */
3702                                    &rxr->rx_ptag))) {
3703                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3704                 goto fail;
3705         }
3706
3707         /* Create the spare maps (used by getbuf) */
3708         error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3709              &rxr->rx_hspare_map);
3710         if (error) {
3711                 device_printf(dev,
3712                     "%s: bus_dmamap_create header spare failed: %d\n",
3713                     __func__, error);
3714                 goto fail;
3715         }
3716         error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3717              &rxr->rx_pspare_map);
3718         if (error) {
3719                 device_printf(dev,
3720                     "%s: bus_dmamap_create packet spare failed: %d\n",
3721                     __func__, error);
3722                 goto fail;
3723         }
3724
3725         for (i = 0; i < adapter->num_rx_desc; i++) {
3726                 rxbuf = &rxr->rx_buffers[i];
3727                 error = bus_dmamap_create(rxr->rx_htag,
3728                     BUS_DMA_NOWAIT, &rxbuf->head_map);
3729                 if (error) {
3730                         device_printf(dev,
3731                             "Unable to create RX head DMA maps\n");
3732                         goto fail;
3733                 }
3734                 error = bus_dmamap_create(rxr->rx_ptag,
3735                     BUS_DMA_NOWAIT, &rxbuf->pack_map);
3736                 if (error) {
3737                         device_printf(dev,
3738                             "Unable to create RX packet DMA maps\n");
3739                         goto fail;
3740                 }
3741         }
3742
3743         return (0);
3744
3745 fail:
3746         /* Frees all, but can handle partial completion */
3747         igb_free_receive_structures(adapter);
3748         return (error);
3749 }
3750
3751
3752 static void
3753 igb_free_receive_ring(struct rx_ring *rxr)
3754 {
3755         struct  adapter         *adapter;
3756         struct igb_rx_buf       *rxbuf;
3757         int i;
3758
3759         adapter = rxr->adapter;
3760         for (i = 0; i < adapter->num_rx_desc; i++) {
3761                 rxbuf = &rxr->rx_buffers[i];
3762                 if (rxbuf->m_head != NULL) {
3763                         bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3764                             BUS_DMASYNC_POSTREAD);
3765                         bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3766                         rxbuf->m_head->m_flags |= M_PKTHDR;
3767                         m_freem(rxbuf->m_head);
3768                 }
3769                 if (rxbuf->m_pack != NULL) {
3770                         bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3771                             BUS_DMASYNC_POSTREAD);
3772                         bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3773                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3774                         m_freem(rxbuf->m_pack);
3775                 }
3776                 rxbuf->m_head = NULL;
3777                 rxbuf->m_pack = NULL;
3778         }
3779 }
3780
3781
3782 /*********************************************************************
3783  *
3784  *  Initialize a receive ring and its buffers.
3785  *
3786  **********************************************************************/
3787 static int
3788 igb_setup_receive_ring(struct rx_ring *rxr)
3789 {
3790         struct  adapter         *adapter;
3791         struct  ifnet           *ifp;
3792         device_t                dev;
3793 #ifdef NET_LRO 
3794         struct lro_ctrl         *lro = &rxr->lro;
3795 #endif
3796         int                     j, rsize, error = 0;
3797
3798         adapter = rxr->adapter;
3799         dev = adapter->dev;
3800         ifp = adapter->ifp;
3801
3802         /* Clear the ring contents */
3803         IGB_RX_LOCK(rxr);
3804         rsize = roundup2(adapter->num_rx_desc *
3805             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3806         bzero((void *)rxr->rx_base, rsize);
3807
3808         /*
3809         ** Free current RX buffer structures and their mbufs
3810         */
3811         igb_free_receive_ring(rxr);
3812
3813         /* Now replenish the ring mbufs */
3814         for (j = 0; j < adapter->num_rx_desc; j++) {
3815                 if ((error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH)) != 0)
3816                         goto fail;
3817         }
3818
3819         /* Setup our descriptor indices */
3820         rxr->next_to_check = 0;
3821         rxr->last_cleaned = 0;
3822         rxr->lro_enabled = FALSE;
3823
3824         if (igb_header_split)
3825                 rxr->hdr_split = TRUE;
3826 #if NET_LRO 
3827         else
3828                 ifp->if_capabilities &= ~IFCAP_LRO;
3829 #endif
3830
3831         rxr->fmp = NULL;
3832         rxr->lmp = NULL;
3833         rxr->discard = FALSE;
3834
3835         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3836             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3837
3838         /*
3839         ** Now set up the LRO interface, we
3840         ** also only do head split when LRO
3841         ** is enabled, since so often they
3842         ** are undesireable in similar setups.
3843         */
3844 #if NET_LRO 
3845         if (ifp->if_capenable & IFCAP_LRO) {
3846                 int err = tcp_lro_init(lro);
3847                 if (err) {
3848                         device_printf(dev, "LRO Initialization failed!\n");
3849                         goto fail;
3850                 }
3851                 INIT_DEBUGOUT("RX LRO Initialized\n");
3852                 rxr->lro_enabled = TRUE;
3853                 lro->ifp = adapter->ifp;
3854         }
3855 #endif
3856
3857         IGB_RX_UNLOCK(rxr);
3858         return (0);
3859
3860 fail:
3861         igb_free_receive_ring(rxr);
3862         IGB_RX_UNLOCK(rxr);
3863         return (error);
3864 }
3865
3866 /*********************************************************************
3867  *
3868  *  Initialize all receive rings.
3869  *
3870  **********************************************************************/
3871 static int
3872 igb_setup_receive_structures(struct adapter *adapter)
3873 {
3874         struct rx_ring *rxr = adapter->rx_rings;
3875         int i, j;
3876
3877         for (i = 0; i < adapter->num_queues; i++, rxr++)
3878                 if (igb_setup_receive_ring(rxr))
3879                         goto fail;
3880
3881         return (0);
3882 fail:
3883         /*
3884          * Free RX buffers allocated so far, we will only handle
3885          * the rings that completed, the failing case will have
3886          * cleaned up for itself. The value of 'i' will be the
3887          * failed ring so we must pre-decrement it.
3888          */
3889         rxr = adapter->rx_rings;
3890         for (--i; i > 0; i--, rxr++) {
3891                 for (j = 0; j < adapter->num_rx_desc; j++)
3892                         igb_free_receive_ring(rxr);
3893         }
3894
3895         return (ENOBUFS);
3896 }
3897
3898 /*********************************************************************
3899  *
3900  *  Enable receive unit.
3901  *
3902  **********************************************************************/
3903 static void
3904 igb_initialize_receive_units(struct adapter *adapter)
3905 {
3906         struct rx_ring  *rxr = adapter->rx_rings;
3907         struct ifnet    *ifp = adapter->ifp;
3908         struct e1000_hw *hw = &adapter->hw;
3909         u32             rctl, rxcsum, psize, srrctl = 0;
3910
3911         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3912
3913         /*
3914          * Make sure receives are disabled while setting
3915          * up the descriptor ring
3916          */
3917         rctl = E1000_READ_REG(hw, E1000_RCTL);
3918         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3919
3920         /*
3921         ** Set up for header split
3922         */
3923         if (rxr->hdr_split) {
3924                 /* Use a standard mbuf for the header */
3925                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3926                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3927         } else
3928                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3929
3930         /*
3931         ** Set up for jumbo frames
3932         */
3933         if (ifp->if_mtu > ETHERMTU) {
3934                 rctl |= E1000_RCTL_LPE;
3935                 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3936                 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3937
3938                 /* Set maximum packet len */
3939                 psize = adapter->max_frame_size;
3940                 /* are we on a vlan? */
3941                 if (adapter->ifp->if_vlantrunks != NULL)
3942                         psize += VLAN_TAG_SIZE;
3943                 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3944         } else {
3945                 rctl &= ~E1000_RCTL_LPE;
3946                 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3947                 rctl |= E1000_RCTL_SZ_2048;
3948         }
3949
3950         /* Setup the Base and Length of the Rx Descriptor Rings */
3951         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3952                 u64 bus_addr = rxr->rxdma.dma_paddr;
3953                 u32 rxdctl;
3954
3955                 E1000_WRITE_REG(hw, E1000_RDLEN(i),
3956                     adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3957                 E1000_WRITE_REG(hw, E1000_RDBAH(i),
3958                     (uint32_t)(bus_addr >> 32));
3959                 E1000_WRITE_REG(hw, E1000_RDBAL(i),
3960                     (uint32_t)bus_addr);
3961                 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3962                 /* Enable this Queue */
3963                 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3964                 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3965                 rxdctl &= 0xFFF00000;
3966                 rxdctl |= IGB_RX_PTHRESH;
3967                 rxdctl |= IGB_RX_HTHRESH << 8;
3968                 rxdctl |= IGB_RX_WTHRESH << 16;
3969                 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3970         }
3971
3972         /*
3973         ** Setup for RX MultiQueue
3974         */
3975         rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3976         if (adapter->num_queues >1) {
3977                 u32 random[10], mrqc, shift = 0;
3978                 union igb_reta {
3979                         u32 dword;
3980                         u8  bytes[4];
3981                 } reta;
3982
3983                 karc4rand(&random, sizeof(random));
3984                 if (adapter->hw.mac.type == e1000_82575)
3985                         shift = 6;
3986                 /* Warning FM follows */
3987                 for (int i = 0; i < 128; i++) {
3988                         reta.bytes[i & 3] =
3989                             (i % adapter->num_queues) << shift;
3990                         if ((i & 3) == 3)
3991                                 E1000_WRITE_REG(hw,
3992                                     E1000_RETA(i >> 2), reta.dword);
3993                 }
3994                 /* Now fill in hash table */
3995                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3996                 for (int i = 0; i < 10; i++)
3997                         E1000_WRITE_REG_ARRAY(hw,
3998                             E1000_RSSRK(0), i, random[i]);
3999
4000                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4001                     E1000_MRQC_RSS_FIELD_IPV4_TCP);
4002                 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4003                     E1000_MRQC_RSS_FIELD_IPV6_TCP);
4004                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4005                     E1000_MRQC_RSS_FIELD_IPV6_UDP);
4006                 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4007                     E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4008
4009                 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4010
4011                 /*
4012                 ** NOTE: Receive Full-Packet Checksum Offload 
4013                 ** is mutually exclusive with Multiqueue. However
4014                 ** this is not the same as TCP/IP checksums which
4015                 ** still work.
4016                 */
4017                 rxcsum |= E1000_RXCSUM_PCSD;
4018 #if __FreeBSD_version >= 800000
4019                 /* For SCTP Offload */
4020                 if ((hw->mac.type == e1000_82576)
4021                     && (ifp->if_capenable & IFCAP_RXCSUM))
4022                         rxcsum |= E1000_RXCSUM_CRCOFL;
4023 #endif
4024         } else {
4025                 /* Non RSS setup */
4026                 if (ifp->if_capenable & IFCAP_RXCSUM) {
4027                         rxcsum |= E1000_RXCSUM_IPPCSE;
4028 #if __FreeBSD_version >= 800000
4029                         if (adapter->hw.mac.type == e1000_82576)
4030                                 rxcsum |= E1000_RXCSUM_CRCOFL;
4031 #endif
4032                 } else
4033                         rxcsum &= ~E1000_RXCSUM_TUOFL;
4034         }
4035         E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4036
4037         /* Setup the Receive Control Register */
4038         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4039         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4040                    E1000_RCTL_RDMTS_HALF |
4041                    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4042         /* Strip CRC bytes. */
4043         rctl |= E1000_RCTL_SECRC;
4044         /* Make sure VLAN Filters are off */
4045         rctl &= ~E1000_RCTL_VFE;
4046         /* Don't store bad packets */
4047         rctl &= ~E1000_RCTL_SBP;
4048
4049         /* Enable Receives */
4050         E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4051
4052         /*
4053          * Setup the HW Rx Head and Tail Descriptor Pointers
4054          *   - needs to be after enable
4055          */
4056         for (int i = 0; i < adapter->num_queues; i++) {
4057                 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4058                 E1000_WRITE_REG(hw, E1000_RDT(i),
4059                      adapter->num_rx_desc - 1);
4060         }
4061         return;
4062 }
4063
4064 /*********************************************************************
4065  *
4066  *  Free receive rings.
4067  *
4068  **********************************************************************/
4069 static void
4070 igb_free_receive_structures(struct adapter *adapter)
4071 {
4072         struct rx_ring *rxr = adapter->rx_rings;
4073
4074         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4075 #ifdef NET_LRO 
4076                 struct lro_ctrl *lro = &rxr->lro;
4077 #endif
4078                 igb_free_receive_buffers(rxr);
4079 #ifdef NET_LRO
4080                 tcp_lro_free(lro);
4081 #endif
4082                 igb_dma_free(adapter, &rxr->rxdma);
4083         }
4084
4085         kfree(adapter->rx_rings, M_DEVBUF);
4086 }
4087
4088 /*********************************************************************
4089  *
4090  *  Free receive ring data structures.
4091  *
4092  **********************************************************************/
4093 static void
4094 igb_free_receive_buffers(struct rx_ring *rxr)
4095 {
4096         struct adapter          *adapter = rxr->adapter;
4097         struct igb_rx_buf       *rxbuf;
4098         int i;
4099
4100         INIT_DEBUGOUT("free_receive_structures: begin");
4101
4102         if (rxr->rx_hspare_map != NULL) {
4103                 bus_dmamap_destroy(rxr->rx_htag, rxr->rx_hspare_map);
4104                 rxr->rx_hspare_map = NULL;
4105         }
4106
4107         if (rxr->rx_hspare_map != NULL) {
4108                 bus_dmamap_destroy(rxr->rx_ptag, rxr->rx_pspare_map);
4109                 rxr->rx_pspare_map = NULL;
4110         }
4111
4112         /* Cleanup any existing buffers */
4113         if (rxr->rx_buffers != NULL) {
4114                 for (i = 0; i < adapter->num_rx_desc; i++) {
4115                         rxbuf = &rxr->rx_buffers[i];
4116                         if (rxbuf->m_head != NULL) {
4117                                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
4118                                     BUS_DMASYNC_POSTREAD);
4119                                 bus_dmamap_unload(rxr->rx_htag,
4120                                     rxbuf->head_map);
4121                                 rxbuf->m_head->m_flags |= M_PKTHDR;
4122                                 m_freem(rxbuf->m_head);
4123                         }
4124                         if (rxbuf->m_pack != NULL) {
4125                                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
4126                                     BUS_DMASYNC_POSTREAD);
4127                                 bus_dmamap_unload(rxr->rx_ptag,
4128                                     rxbuf->pack_map);
4129                                 rxbuf->m_pack->m_flags |= M_PKTHDR;
4130                                 m_freem(rxbuf->m_pack);
4131                         }
4132                         rxbuf->m_head = NULL;
4133                         rxbuf->m_pack = NULL;
4134                         if (rxbuf->head_map != NULL) {
4135                                 bus_dmamap_destroy(rxr->rx_htag,
4136                                     rxbuf->head_map);
4137                                 rxbuf->head_map = NULL;
4138                         }
4139                         if (rxbuf->pack_map != NULL) {
4140                                 bus_dmamap_destroy(rxr->rx_ptag,
4141                                     rxbuf->pack_map);
4142                                 rxbuf->pack_map = NULL;
4143                         }
4144                 }
4145                 if (rxr->rx_buffers != NULL) {
4146                         kfree(rxr->rx_buffers, M_DEVBUF);
4147                         rxr->rx_buffers = NULL;
4148                 }
4149         }
4150
4151         if (rxr->rx_htag != NULL) {
4152                 bus_dma_tag_destroy(rxr->rx_htag);
4153                 rxr->rx_htag = NULL;
4154         }
4155         if (rxr->rx_ptag != NULL) {
4156                 bus_dma_tag_destroy(rxr->rx_ptag);
4157                 rxr->rx_ptag = NULL;
4158         }
4159 }
4160
4161 static __inline void
4162 igb_rx_discard(struct rx_ring *rxr, union e1000_adv_rx_desc *cur, int i)
4163 {
4164
4165         if (rxr->fmp != NULL) {
4166                 rxr->fmp->m_flags |= M_PKTHDR;
4167                 m_freem(rxr->fmp);
4168                 rxr->fmp = NULL;
4169                 rxr->lmp = NULL;
4170         }
4171 }
4172
4173 static __inline void
4174 igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4175 {
4176
4177         /*
4178          * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4179          * should be computed by hardware. Also it should not have VLAN tag in
4180          * ethernet header.
4181          */
4182 #ifdef NET_LRO
4183         if (rxr->lro_enabled &&
4184             (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4185             (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4186             (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4187             (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4188             (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 
4189             (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4190                 /*
4191                  * Send to the stack if:
4192                  **  - LRO not enabled, or
4193                  **  - no LRO resources, or
4194                  **  - lro enqueue fails
4195                  */
4196                 if (rxr->lro.lro_cnt != 0)
4197                         if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4198                                 return;
4199         }
4200 #endif
4201         (*ifp->if_input)(ifp, m);
4202 }
4203
4204 /*********************************************************************
4205  *
4206  *  This routine executes in interrupt context. It replenishes
4207  *  the mbufs in the descriptor and sends data which has been
4208  *  dma'ed into host memory to upper layer.
4209  *
4210  *  We loop at most count times if count is > 0, or until done if
4211  *  count < 0.
4212  *
4213  *  Return TRUE if more to clean, FALSE otherwise
4214  *********************************************************************/
4215 static bool
4216 igb_rxeof(struct rx_ring *rxr, int count)
4217 {
4218         struct adapter          *adapter = rxr->adapter;
4219         struct ifnet            *ifp = adapter->ifp;
4220 #ifdef NET_LRO
4221         struct lro_ctrl         *lro = &rxr->lro;
4222         struct lro_entry        *queued;
4223 #endif
4224         int                     i, prog = 0;
4225         u32                     ptype, staterr = 0;
4226         union e1000_adv_rx_desc *cur;
4227
4228         IGB_RX_LOCK(rxr);
4229
4230         /* Main clean loop */
4231         for (i = rxr->next_to_check; count > 0; prog++) {
4232                 struct mbuf *sendmp, *mh, *mp;
4233                 u16 hlen, plen, hdr, vtag;
4234                 bool eop = FALSE;
4235                 u8 dopayload;
4236  
4237                 /* Sync the ring. */
4238                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4239                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4240                 cur = &rxr->rx_base[i];
4241                 staterr = le32toh(cur->wb.upper.status_error);
4242                 if ((staterr & E1000_RXD_STAT_DD) == 0)
4243                         break;
4244                 if ((ifp->if_flags & IFF_RUNNING) == 0)
4245                         break;
4246                 count--;
4247                 sendmp = mh = mp = NULL;
4248                 cur->wb.upper.status_error = 0;
4249                 plen = le16toh(cur->wb.upper.length);
4250                 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4251                 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4252                 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4253
4254                 /* Make sure all segments of a bad packet are discarded */
4255                 if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4256                     (rxr->discard)) {
4257                         ifp->if_ierrors++;
4258                         ++rxr->rx_discarded;
4259                         if (!eop) /* Catch subsequent segs */
4260                                 rxr->discard = TRUE;
4261                         else
4262                                 rxr->discard = FALSE;
4263                         igb_rx_discard(rxr, cur, i);
4264                         goto next_desc;
4265                 }
4266
4267                 /*
4268                 ** The way the hardware is configured to
4269                 ** split, it will ONLY use the header buffer
4270                 ** when header split is enabled, otherwise we
4271                 ** get normal behavior, ie, both header and
4272                 ** payload are DMA'd into the payload buffer.
4273                 **
4274                 ** The fmp test is to catch the case where a
4275                 ** packet spans multiple descriptors, in that
4276                 ** case only the first header is valid.
4277                 */
4278                 if (rxr->hdr_split && rxr->fmp == NULL) {
4279                         hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4280                             E1000_RXDADV_HDRBUFLEN_SHIFT;
4281                         if (hlen > IGB_HDR_BUF)
4282                                 hlen = IGB_HDR_BUF;
4283                         /* Handle the header mbuf */
4284                         mh = rxr->rx_buffers[i].m_head;
4285                         mh->m_len = hlen;
4286                         dopayload = IGB_CLEAN_HEADER;
4287                         /*
4288                         ** Get the payload length, this
4289                         ** could be zero if its a small
4290                         ** packet.
4291                         */
4292                         if (plen > 0) {
4293                                 mp = rxr->rx_buffers[i].m_pack;
4294                                 mp->m_len = plen;
4295                                 mh->m_next = mp;
4296                                 dopayload = IGB_CLEAN_BOTH;
4297                                 rxr->rx_split_packets++;
4298                         }
4299                 } else {
4300                         /*
4301                         ** Either no header split, or a
4302                         ** secondary piece of a fragmented
4303                         ** split packet.
4304                         */
4305                         mh = rxr->rx_buffers[i].m_pack;
4306                         mh->m_len = plen;
4307                         dopayload = IGB_CLEAN_PAYLOAD;
4308                 }
4309
4310                 /*
4311                 ** get_buf will overwrite the writeback
4312                 ** descriptor so save the VLAN tag now.
4313                 */
4314                 vtag = le16toh(cur->wb.upper.vlan);
4315                 if (igb_get_buf(rxr, i, dopayload) != 0) {
4316                         ifp->if_iqdrops++;
4317                         /*
4318                          * We've dropped a frame due to lack of resources
4319                          * so we should drop entire multi-segmented
4320                          * frames until we encounter EOP.
4321                          */
4322                         if ((staterr & E1000_RXD_STAT_EOP) != 0)
4323                                 rxr->discard = TRUE;
4324                         igb_rx_discard(rxr, cur, i);
4325                         goto next_desc;
4326                 }
4327
4328                 /* Initial frame - setup */
4329                 if (rxr->fmp == NULL) {
4330                         mh->m_pkthdr.len = mh->m_len;
4331                         /* Store the first mbuf */
4332                         rxr->fmp = mh;
4333                         rxr->lmp = mh;
4334                         if (mp != NULL) {
4335                                 /* Add payload if split */
4336                                 mh->m_pkthdr.len += mp->m_len;
4337                                 rxr->lmp = mh->m_next;
4338                         }
4339                 } else {
4340                         /* Chain mbuf's together */
4341                         rxr->lmp->m_next = mh;
4342                         rxr->lmp = rxr->lmp->m_next;
4343                         rxr->fmp->m_pkthdr.len += mh->m_len;
4344                 }
4345
4346                 if (eop) {
4347                         rxr->fmp->m_pkthdr.rcvif = ifp;
4348                         ifp->if_ipackets++;
4349                         rxr->rx_packets++;
4350                         /* capture data for AIM */
4351                         rxr->packets++;
4352                         rxr->bytes += rxr->fmp->m_pkthdr.len;
4353                         rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4354
4355                         if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4356                                 igb_rx_checksum(staterr, rxr->fmp, ptype);
4357                         /* XXX igb(4) always strips VLAN. */
4358                         if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4359                             (staterr & E1000_RXD_STAT_VP) != 0) {
4360                                 rxr->fmp->m_pkthdr.ether_vlantag = vtag;
4361                                 rxr->fmp->m_flags |= M_VLANTAG;
4362                         }
4363 #if __FreeBSD_version >= 800000
4364                         rxr->fmp->m_pkthdr.flowid = curcpu;
4365                         rxr->fmp->m_flags |= M_FLOWID;
4366 #endif
4367                         sendmp = rxr->fmp;
4368                         /* Make sure to set M_PKTHDR. */
4369                         sendmp->m_flags |= M_PKTHDR;
4370                         rxr->fmp = NULL;
4371                         rxr->lmp = NULL;
4372                 }
4373
4374 next_desc:
4375                 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4376                     BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4377
4378                 rxr->last_cleaned = i; /* For updating tail */
4379
4380                 /* Advance our pointers to the next descriptor. */
4381                 if (++i == adapter->num_rx_desc)
4382                         i = 0;
4383  
4384                 /*
4385                 ** Note that we hold the RX lock thru
4386                 ** the following call so this ring's
4387                 ** next_to_check is not gonna change.
4388                 */
4389                 if (sendmp != NULL)
4390                         igb_rx_input(rxr, ifp, sendmp, ptype);
4391         }
4392
4393         if (prog == 0) {
4394                 IGB_RX_UNLOCK(rxr);
4395                 return (FALSE);
4396         }
4397
4398         rxr->next_to_check = i;
4399
4400         /* Advance the E1000's Receive Queue "Tail Pointer". */
4401         E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4402
4403         /*
4404          * Flush any outstanding LRO work
4405          */
4406 #ifdef NET_LRO
4407         while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4408                 SLIST_REMOVE_HEAD(&lro->lro_active, next);
4409                 tcp_lro_flush(lro, queued);
4410         }
4411 #endif
4412
4413         IGB_RX_UNLOCK(rxr);
4414
4415         /*
4416         ** We still have cleaning to do?
4417         ** Schedule another interrupt if so.
4418         */
4419         if ((staterr & E1000_RXD_STAT_DD) != 0)
4420                 return (TRUE);
4421
4422         return (FALSE);
4423 }
4424
4425 /*********************************************************************
4426  *
4427  *  Verify that the hardware indicated that the checksum is valid.
4428  *  Inform the stack about the status of checksum so that stack
4429  *  doesn't spend time verifying the checksum.
4430  *
4431  *********************************************************************/
4432 static void
4433 igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4434 {
4435         u16 status = (u16)staterr;
4436         u8  errors = (u8) (staterr >> 24);
4437         int sctp;
4438
4439         /* Ignore Checksum bit is set */
4440         if (status & E1000_RXD_STAT_IXSM) {
4441                 mp->m_pkthdr.csum_flags = 0;
4442                 return;
4443         }
4444
4445         if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4446             (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4447                 sctp = 1;
4448         else
4449                 sctp = 0;
4450         if (status & E1000_RXD_STAT_IPCS) {
4451                 /* Did it pass? */
4452                 if (!(errors & E1000_RXD_ERR_IPE)) {
4453                         /* IP Checksum Good */
4454                         mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4455                         mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4456                 } else
4457                         mp->m_pkthdr.csum_flags = 0;
4458         }
4459
4460         if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4461                 u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4462 #if __FreeBSD_version >= 800000
4463                 if (sctp) /* reassign */
4464                         type = CSUM_SCTP_VALID;
4465 #endif
4466                 /* Did it pass? */
4467                 if (!(errors & E1000_RXD_ERR_TCPE)) {
4468                         mp->m_pkthdr.csum_flags |= type;
4469                         if (sctp == 0)
4470                                 mp->m_pkthdr.csum_data = htons(0xffff);
4471                 }
4472         }
4473         return;
4474 }
4475
4476 /*
4477  * This routine is run via an vlan
4478  * config EVENT
4479  */
4480 static void
4481 igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4482 {
4483         struct adapter  *adapter = ifp->if_softc;
4484         u32             index, bit;
4485
4486         if (ifp->if_softc !=  arg)   /* Not our event */
4487                 return;
4488
4489         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4490                 return;
4491
4492         index = (vtag >> 5) & 0x7F;
4493         bit = vtag & 0x1F;
4494         igb_shadow_vfta[index] |= (1 << bit);
4495         ++adapter->num_vlans;
4496         /* Re-init to load the changes */
4497         igb_init(adapter);
4498 }
4499
4500 /*
4501  * This routine is run via an vlan
4502  * unconfig EVENT
4503  */
4504 static void
4505 igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4506 {
4507         struct adapter  *adapter = ifp->if_softc;
4508         u32             index, bit;
4509
4510         if (ifp->if_softc !=  arg)
4511                 return;
4512
4513         if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4514                 return;
4515
4516         index = (vtag >> 5) & 0x7F;
4517         bit = vtag & 0x1F;
4518         igb_shadow_vfta[index] &= ~(1 << bit);
4519         --adapter->num_vlans;
4520         /* Re-init to load the changes */
4521         igb_init(adapter);
4522 }
4523
4524 static void
4525 igb_setup_vlan_hw_support(struct adapter *adapter)
4526 {
4527         struct e1000_hw *hw = &adapter->hw;
4528         u32             reg;
4529
4530         /*
4531         ** We get here thru init_locked, meaning
4532         ** a soft reset, this has already cleared
4533         ** the VFTA and other state, so if there
4534         ** have been no vlan's registered do nothing.
4535         */
4536         if (adapter->num_vlans == 0)
4537                 return;
4538
4539         /*
4540         ** A soft reset zero's out the VFTA, so
4541         ** we need to repopulate it now.
4542         */
4543         for (int i = 0; i < IGB_VFTA_SIZE; i++)
4544                 if (igb_shadow_vfta[i] != 0)
4545                         E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4546                             i, igb_shadow_vfta[i]);
4547
4548         reg = E1000_READ_REG(hw, E1000_CTRL);
4549         reg |= E1000_CTRL_VME;
4550         E1000_WRITE_REG(hw, E1000_CTRL, reg);
4551
4552         /* Enable the Filter Table */
4553         reg = E1000_READ_REG(hw, E1000_RCTL);
4554         reg &= ~E1000_RCTL_CFIEN;
4555         reg |= E1000_RCTL_VFE;
4556         E1000_WRITE_REG(hw, E1000_RCTL, reg);
4557
4558         /* Update the frame size */
4559         E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4560             adapter->max_frame_size + VLAN_TAG_SIZE);
4561 }
4562
4563 static void
4564 igb_enable_intr(struct adapter *adapter)
4565 {
4566         /* With RSS set up what to auto clear */
4567         if (adapter->msix_mem) {
4568                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4569                     adapter->eims_mask);
4570                 E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4571                     adapter->eims_mask);
4572                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4573                     adapter->eims_mask);
4574                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4575                     E1000_IMS_LSC);
4576         } else {
4577                 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4578                     IMS_ENABLE_MASK);
4579         }
4580         E1000_WRITE_FLUSH(&adapter->hw);
4581
4582         return;
4583 }
4584
4585 static void
4586 igb_disable_intr(struct adapter *adapter)
4587 {
4588         if (adapter->msix_mem) {
4589                 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4590                 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4591         } 
4592         E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4593         E1000_WRITE_FLUSH(&adapter->hw);
4594         return;
4595 }
4596
4597 /*
4598  * Bit of a misnomer, what this really means is
4599  * to enable OS management of the system... aka
4600  * to disable special hardware management features 
4601  */
4602 static void
4603 igb_init_manageability(struct adapter *adapter)
4604 {
4605         if (adapter->has_manage) {
4606                 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4607                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4608
4609                 /* disable hardware interception of ARP */
4610                 manc &= ~(E1000_MANC_ARP_EN);
4611
4612                 /* enable receiving management packets to the host */
4613                 manc |= E1000_MANC_EN_MNG2HOST;
4614                 manc2h |= 1 << 5;  /* Mng Port 623 */
4615                 manc2h |= 1 << 6;  /* Mng Port 664 */
4616                 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4617                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4618         }
4619 }
4620
4621 /*
4622  * Give control back to hardware management
4623  * controller if there is one.
4624  */
4625 static void
4626 igb_release_manageability(struct adapter *adapter)
4627 {
4628         if (adapter->has_manage) {
4629                 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4630
4631                 /* re-enable hardware interception of ARP */
4632                 manc |= E1000_MANC_ARP_EN;
4633                 manc &= ~E1000_MANC_EN_MNG2HOST;
4634
4635                 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4636         }
4637 }
4638
4639 /*
4640  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4641  * For ASF and Pass Through versions of f/w this means that
4642  * the driver is loaded. 
4643  *
4644  */
4645 static void
4646 igb_get_hw_control(struct adapter *adapter)
4647 {
4648         u32 ctrl_ext;
4649
4650         /* Let firmware know the driver has taken over */
4651         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4652         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4653             ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4654 }
4655
4656 /*
4657  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4658  * For ASF and Pass Through versions of f/w this means that the
4659  * driver is no longer loaded.
4660  *
4661  */
4662 static void
4663 igb_release_hw_control(struct adapter *adapter)
4664 {
4665         u32 ctrl_ext;
4666
4667         /* Let firmware taken over control of h/w */
4668         ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4669         E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4670             ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4671 }
4672
4673 static int
4674 igb_is_valid_ether_addr(uint8_t *addr)
4675 {
4676         char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4677
4678         if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4679                 return (FALSE);
4680         }
4681
4682         return (TRUE);
4683 }
4684
4685
4686 /*
4687  * Enable PCI Wake On Lan capability
4688  */
4689 void
4690 igb_enable_wakeup(device_t dev)
4691 {
4692         u16     cap, status;
4693         u8      id;
4694
4695         /* First find the capabilities pointer*/
4696         cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4697         /* Read the PM Capabilities */
4698         id = pci_read_config(dev, cap, 1);
4699         if (id != PCIY_PMG)     /* Something wrong */
4700                 return;
4701         /* OK, we have the power capabilities, so
4702            now get the status register */
4703         cap += PCIR_POWER_STATUS;
4704         status = pci_read_config(dev, cap, 2);
4705         status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4706         pci_write_config(dev, cap, status, 2);
4707         return;
4708 }
4709
4710
4711 /**********************************************************************
4712  *
4713  *  Update the board statistics counters.
4714  *
4715  **********************************************************************/
4716 static void
4717 igb_update_stats_counters(struct adapter *adapter)
4718 {
4719         struct ifnet   *ifp;
4720
4721         if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4722            (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4723                 adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4724                 adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4725         }
4726         adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4727         adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4728         adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4729         adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4730
4731         adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4732         adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4733         adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4734         adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4735         adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4736         adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4737         adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4738         adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4739         adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4740         adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4741         adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4742         adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4743         adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4744         adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4745         adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4746         adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4747         adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4748         adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4749         adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4750         adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4751
4752         /* For the 64-bit byte counters the low dword must be read first. */
4753         /* Both registers clear on the read of the high dword */
4754
4755         adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4756         adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4757
4758         adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4759         adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4760         adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4761         adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4762         adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4763
4764         adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4765         adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4766
4767         adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4768         adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4769         adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4770         adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4771         adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4772         adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4773         adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4774         adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4775         adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4776         adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4777
4778         adapter->stats.algnerrc += 
4779                 E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4780         adapter->stats.rxerrc += 
4781                 E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4782         adapter->stats.tncrs += 
4783                 E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4784         adapter->stats.cexterr += 
4785                 E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4786         adapter->stats.tsctc += 
4787                 E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4788         adapter->stats.tsctfc += 
4789                 E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4790         ifp = adapter->ifp;
4791
4792         ifp->if_collisions = adapter->stats.colc;
4793
4794         /* Rx Errors */
4795         ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4796             adapter->stats.crcerrs + adapter->stats.algnerrc +
4797             adapter->stats.ruc + adapter->stats.roc +
4798             adapter->stats.mpc + adapter->stats.cexterr;
4799
4800         /* Tx Errors */
4801         ifp->if_oerrors = adapter->stats.ecol +
4802             adapter->stats.latecol + adapter->watchdog_events;
4803 }
4804
4805
4806 /**********************************************************************
4807  *
4808  *  This routine is called only when igb_display_debug_stats is enabled.
4809  *  This routine provides a way to take a look at important statistics
4810  *  maintained by the driver and hardware.
4811  *
4812  **********************************************************************/
4813 static void
4814 igb_print_debug_info(struct adapter *adapter)
4815 {
4816         device_t dev = adapter->dev;
4817         struct igb_queue *que = adapter->queues;
4818         struct rx_ring *rxr = adapter->rx_rings;
4819         struct tx_ring *txr = adapter->tx_rings;
4820         uint8_t *hw_addr = adapter->hw.hw_addr;
4821
4822         device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4823         device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4824             E1000_READ_REG(&adapter->hw, E1000_CTRL),
4825             E1000_READ_REG(&adapter->hw, E1000_RCTL));
4826
4827 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4828         device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4829             E1000_READ_REG(&adapter->hw, E1000_IMS),
4830             E1000_READ_REG(&adapter->hw, E1000_EIMS));
4831 #endif
4832
4833         device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4834             ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4835             (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4836         device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4837             adapter->hw.fc.high_water,
4838             adapter->hw.fc.low_water);
4839
4840         for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4841                 device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4842                     E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4843                     E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4844                 device_printf(dev, "rdh = %d, rdt = %d\n",
4845                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4846                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4847                 device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4848                     txr->me, (long long)txr->no_desc_avail);
4849                 device_printf(dev, "TX(%d) Packets sent = %lld\n",
4850                     txr->me, (long long)txr->tx_packets);
4851                 device_printf(dev, "RX(%d) Packets received = %lld  ",
4852                     rxr->me, (long long)rxr->rx_packets);
4853         }
4854
4855         for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4856 #ifdef NET_LRO
4857                 struct lro_ctrl *lro = &rxr->lro;
4858 #endif
4859                 device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4860                     E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4861                     E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4862                 device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4863                     (long long)rxr->rx_packets);
4864                 device_printf(dev, " Split Packets = %lld ",
4865                     (long long)rxr->rx_split_packets);
4866                 device_printf(dev, " Byte count = %lld\n",
4867                     (long long)rxr->rx_bytes);
4868 #ifdef NET_LRO
4869                 device_printf(dev,"RX(%d) LRO Queued= %d  ",
4870                     i, lro->lro_queued);
4871                 device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4872 #endif
4873         }
4874
4875         for (int i = 0; i < adapter->num_queues; i++, que++)
4876                 device_printf(dev,"QUE(%d) IRQs = %llx\n",
4877                     i, (long long)que->irqs);
4878
4879         device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4880         device_printf(dev, "Mbuf defrag failed = %ld\n",
4881             adapter->mbuf_defrag_failed);
4882         device_printf(dev, "Std mbuf header failed = %ld\n",
4883             adapter->mbuf_header_failed);
4884         device_printf(dev, "Std mbuf packet failed = %ld\n",
4885             adapter->mbuf_packet_failed);
4886         device_printf(dev, "Driver dropped packets = %ld\n",
4887             adapter->dropped_pkts);
4888         device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4889                 adapter->no_tx_dma_setup);
4890 }
4891
4892 static void
4893 igb_print_hw_stats(struct adapter *adapter)
4894 {
4895         device_t dev = adapter->dev;
4896
4897         device_printf(dev, "Excessive collisions = %lld\n",
4898             (long long)adapter->stats.ecol);
4899 #if     (DEBUG_HW > 0)  /* Dont output these errors normally */
4900         device_printf(dev, "Symbol errors = %lld\n",
4901             (long long)adapter->stats.symerrs);
4902 #endif
4903         device_printf(dev, "Sequence errors = %lld\n",
4904             (long long)adapter->stats.sec);
4905         device_printf(dev, "Defer count = %lld\n",
4906             (long long)adapter->stats.dc);
4907         device_printf(dev, "Missed Packets = %lld\n",
4908             (long long)adapter->stats.mpc);
4909         device_printf(dev, "Receive No Buffers = %lld\n",
4910             (long long)adapter->stats.rnbc);
4911         /* RLEC is inaccurate on some hardware, calculate our own. */
4912         device_printf(dev, "Receive Length Errors = %lld\n",
4913             ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4914         device_printf(dev, "Receive errors = %lld\n",
4915             (long long)adapter->stats.rxerrc);
4916         device_printf(dev, "Crc errors = %lld\n",
4917             (long long)adapter->stats.crcerrs);
4918         device_printf(dev, "Alignment errors = %lld\n",
4919             (long long)adapter->stats.algnerrc);
4920         /* On 82575 these are collision counts */
4921         device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4922             (long long)adapter->stats.cexterr);
4923         device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4924         device_printf(dev, "watchdog timeouts = %ld\n",
4925             adapter->watchdog_events);
4926         device_printf(dev, "XON Rcvd = %lld\n",
4927             (long long)adapter->stats.xonrxc);
4928         device_printf(dev, "XON Xmtd = %lld\n",
4929             (long long)adapter->stats.xontxc);
4930         device_printf(dev, "XOFF Rcvd = %lld\n",
4931             (long long)adapter->stats.xoffrxc);
4932         device_printf(dev, "XOFF Xmtd = %lld\n",
4933             (long long)adapter->stats.xofftxc);
4934         device_printf(dev, "Good Packets Rcvd = %lld\n",
4935             (long long)adapter->stats.gprc);
4936         device_printf(dev, "Good Packets Xmtd = %lld\n",
4937             (long long)adapter->stats.gptc);
4938         device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4939             (long long)adapter->stats.tsctc);
4940         device_printf(dev, "TSO Contexts Failed = %lld\n",
4941             (long long)adapter->stats.tsctfc);
4942 }
4943
4944 /**********************************************************************
4945  *
4946  *  This routine provides a way to dump out the adapter eeprom,
4947  *  often a useful debug/service tool. This only dumps the first
4948  *  32 words, stuff that matters is in that extent.
4949  *
4950  **********************************************************************/
4951 static void
4952 igb_print_nvm_info(struct adapter *adapter)
4953 {
4954         u16     eeprom_data;
4955         int     i, j, row = 0;
4956
4957         /* Its a bit crude, but it gets the job done */
4958         kprintf("\nInterface EEPROM Dump:\n");
4959         kprintf("Offset\n0x0000  ");
4960         for (i = 0, j = 0; i < 32; i++, j++) {
4961                 if (j == 8) { /* Make the offset block */
4962                         j = 0; ++row;
4963                         kprintf("\n0x00%x0  ",row);
4964                 }
4965                 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4966                 kprintf("%04x ", eeprom_data);
4967         }
4968         kprintf("\n");
4969 }
4970
4971 static int
4972 igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4973 {
4974         struct adapter *adapter;
4975         int error;
4976         int result;
4977
4978         result = -1;
4979         error = sysctl_handle_int(oidp, &result, 0, req);
4980
4981         if (error || !req->newptr)
4982                 return (error);
4983
4984         if (result == 1) {
4985                 adapter = (struct adapter *)arg1;
4986                 igb_print_debug_info(adapter);
4987         }
4988         /*
4989          * This value will cause a hex dump of the
4990          * first 32 16-bit words of the EEPROM to
4991          * the screen.
4992          */
4993         if (result == 2) {
4994                 adapter = (struct adapter *)arg1;
4995                 igb_print_nvm_info(adapter);
4996         }
4997
4998         return (error);
4999 }
5000
5001
5002 static int
5003 igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
5004 {
5005         struct adapter *adapter;
5006         int error;
5007         int result;
5008
5009         result = -1;
5010         error = sysctl_handle_int(oidp, &result, 0, req);
5011
5012         if (error || !req->newptr)
5013                 return (error);
5014
5015         if (result == 1) {
5016                 adapter = (struct adapter *)arg1;
5017                 igb_print_hw_stats(adapter);
5018         }
5019
5020         return (error);
5021 }
5022
5023 static void
5024 igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5025         const char *description, int *limit, int value)
5026 {
5027         *limit = value;
5028         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
5029             SYSCTL_CHILDREN(adapter->sysctl_tree),
5030             OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5031 }