kernel - callout_init() -> callout_init_mp() in selected cases
[dragonfly.git] / sys / dev / netif / e1000 / if_igb.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2010, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33
34
35 #include "opt_polling.h"
36 #include "opt_inet.h"
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #if __FreeBSD_version >= 800000
41 #include <sys/buf_ring.h>
42 #endif
43 #include <sys/bus.h>
44 #include <sys/endian.h>
45 #include <sys/lock.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/malloc.h>
49 #include <sys/mbuf.h>
50 #include <sys/module.h>
51 #include <sys/rman.h>
52 #include <sys/socket.h>
53 #include <sys/sockio.h>
54 #include <sys/sysctl.h>
55 #include <sys/taskqueue.h>
56 #include <sys/eventhandler.h>
57
58 #ifdef IGB_IEEE1588
59 #include <sys/ieee1588.h>
60 #endif
61
62 #include <net/bpf.h>
63 #include <net/ethernet.h>
64 #include <net/if.h>
65 #include <net/if_arp.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/ifq_var.h>
69
70 #include <net/if_types.h>
71 #include <net/vlan/if_vlan_var.h>
72 #include <net/vlan/if_vlan_ether.h>
73
74 #include <netinet/in_systm.h>
75 #include <netinet/in.h>
76 #include <netinet/if_ether.h>
77 #include <netinet/ip.h>
78 #include <netinet/ip6.h>
79 #include <netinet/tcp.h>
80 #ifdef NET_LRO
81 #include <netinet/tcp_lro.h>
82 #endif
83 #include <netinet/udp.h>
84
85 #include <sys/in_cksum.h>
86 #include <bus/pci/pcivar.h>
87 #include <bus/pci/pcireg.h>
88
89 #include "e1000_api.h"
90 #include "e1000_82575.h"
91 #include "if_igb.h"
92 #include "ifcap_defines.h" // XXX
93
94 /*********************************************************************
95  *  Set this to one to display debug statistics
96  *********************************************************************/
97 int     igb_display_debug_stats = 0;
98
99 /*********************************************************************
100  *  Driver version:
101  *********************************************************************/
102 char igb_driver_version[] = "version - 1.9.1";
103
104
105 /*********************************************************************
106  *  PCI Device ID Table
107  *
108  *  Used by probe to select devices to load on
109  *  Last field stores an index into e1000_strings
110  *  Last entry must be all 0s
111  *
112  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113  *********************************************************************/
114
115 static igb_vendor_info_t igb_vendor_info_array[] =
116 {
117         { 0x8086, E1000_DEV_ID_82575EB_COPPER,  PCI_ANY_ID, PCI_ANY_ID, 0},
118         { 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
120         { 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
122         { 0x8086, E1000_DEV_ID_82576,           PCI_ANY_ID, PCI_ANY_ID, 0},
123         { 0x8086, E1000_DEV_ID_82576_NS,        PCI_ANY_ID, PCI_ANY_ID, 0},
124         { 0x8086, E1000_DEV_ID_82576_NS_SERDES, PCI_ANY_ID, PCI_ANY_ID, 0},
125         { 0x8086, E1000_DEV_ID_82576_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
126         { 0x8086, E1000_DEV_ID_82576_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
127         { 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
129         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
131         { 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
133         { 0x8086, E1000_DEV_ID_82580_COPPER,    PCI_ANY_ID, PCI_ANY_ID, 0},
134         { 0x8086, E1000_DEV_ID_82580_FIBER,     PCI_ANY_ID, PCI_ANY_ID, 0},
135         { 0x8086, E1000_DEV_ID_82580_SERDES,    PCI_ANY_ID, PCI_ANY_ID, 0},
136         { 0x8086, E1000_DEV_ID_82580_SGMII,     PCI_ANY_ID, PCI_ANY_ID, 0},
137         { 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
138                                                 PCI_ANY_ID, PCI_ANY_ID, 0},
139         /* required last entry */
140         { 0, 0, 0, 0, 0}
141 };
142
143 /*********************************************************************
144  *  Table of branding strings for all supported NICs.
145  *********************************************************************/
146
147 static char *igb_strings[] = {
148         "Intel(R) PRO/1000 Network Connection"
149 };
150
151 /*********************************************************************
152  *  Function prototypes
153  *********************************************************************/
154 static int      igb_probe(device_t);
155 static int      igb_attach(device_t);
156 static int      igb_detach(device_t);
157 static int      igb_shutdown(device_t);
158 static int      igb_suspend(device_t);
159 static int      igb_resume(device_t);
160 static void     igb_start(struct ifnet *);
161 static void     igb_start_locked(struct tx_ring *, struct ifnet *ifp);
162 #if __FreeBSD_version >= 800000
163 static int      igb_mq_start(struct ifnet *, struct mbuf *);
164 static int      igb_mq_start_locked(struct ifnet *,
165                     struct tx_ring *, struct mbuf *);
166 static void     igb_qflush(struct ifnet *);
167 #endif
168 static int      igb_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
169 static void     igb_init(void *);
170 static void     igb_init_locked(struct adapter *);
171 static void     igb_stop(void *);
172 static void     igb_media_status(struct ifnet *, struct ifmediareq *);
173 static int      igb_media_change(struct ifnet *);
174 static void     igb_identify_hardware(struct adapter *);
175 static int      igb_allocate_pci_resources(struct adapter *);
176 static int      igb_allocate_msix(struct adapter *);
177 static int      igb_allocate_legacy(struct adapter *);
178 static int      igb_setup_msix(struct adapter *);
179 static void     igb_free_pci_resources(struct adapter *);
180 static void     igb_local_timer(void *);
181 static void     igb_reset(struct adapter *);
182 static void     igb_setup_interface(device_t, struct adapter *);
183 static int      igb_allocate_queues(struct adapter *);
184 static void     igb_configure_queues(struct adapter *);
185
186 static int      igb_allocate_transmit_buffers(struct tx_ring *);
187 static void     igb_setup_transmit_structures(struct adapter *);
188 static void     igb_setup_transmit_ring(struct tx_ring *);
189 static void     igb_initialize_transmit_units(struct adapter *);
190 static void     igb_free_transmit_structures(struct adapter *);
191 static void     igb_free_transmit_buffers(struct tx_ring *);
192
193 static int      igb_allocate_receive_buffers(struct rx_ring *);
194 static int      igb_setup_receive_structures(struct adapter *);
195 static int      igb_setup_receive_ring(struct rx_ring *);
196 static void     igb_initialize_receive_units(struct adapter *);
197 static void     igb_free_receive_structures(struct adapter *);
198 static void     igb_free_receive_buffers(struct rx_ring *);
199 static void     igb_free_receive_ring(struct rx_ring *);
200
201 static void     igb_enable_intr(struct adapter *);
202 static void     igb_disable_intr(struct adapter *);
203 static void     igb_update_stats_counters(struct adapter *);
204 static bool     igb_txeof(struct tx_ring *);
205
206 static __inline void igb_rx_discard(struct rx_ring *,
207                     union e1000_adv_rx_desc *, int);
208 static __inline void igb_rx_input(struct rx_ring *,
209                     struct ifnet *, struct mbuf *, u32);
210
211 static bool     igb_rxeof(struct rx_ring *, int);
212 static void     igb_rx_checksum(u32, struct mbuf *, u32);
213 static int      igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
214 #if NET_TSO 
215 static bool     igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
216 #endif
217 static void     igb_set_promisc(struct adapter *);
218 static void     igb_disable_promisc(struct adapter *);
219 static void     igb_set_multi(struct adapter *);
220 static void     igb_print_hw_stats(struct adapter *);
221 static void     igb_update_link_status(struct adapter *);
222 static int      igb_get_buf(struct rx_ring *, int, u8);
223
224 static void     igb_register_vlan(void *, struct ifnet *, u16);
225 static void     igb_unregister_vlan(void *, struct ifnet *, u16);
226 static void     igb_setup_vlan_hw_support(struct adapter *);
227
228 static int      igb_xmit(struct tx_ring *, struct mbuf **);
229 static int      igb_dma_malloc(struct adapter *, bus_size_t,
230                     struct igb_dma_alloc *, int);
231 static void     igb_dma_free(struct adapter *, struct igb_dma_alloc *);
232 static void     igb_print_debug_info(struct adapter *);
233 static void     igb_print_nvm_info(struct adapter *);
234 static int      igb_is_valid_ether_addr(u8 *);
235 static int      igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
236 static int      igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
237 /* Management and WOL Support */
238 static void     igb_init_manageability(struct adapter *);
239 static void     igb_release_manageability(struct adapter *);
240 static void     igb_get_hw_control(struct adapter *);
241 static void     igb_release_hw_control(struct adapter *);
242 static void     igb_enable_wakeup(device_t);
243
244 static void     igb_irq_fast(void *);
245 static void     igb_add_rx_process_limit(struct adapter *, const char *,
246                     const char *, int *, int);
247 static void     igb_handle_rxtx(void *context, int pending);
248 static void     igb_handle_que(void *context, int pending);
249 static void     igb_handle_link(void *context, int pending);
250
251 /* These are MSIX only irq handlers */
252 static void     igb_msix_que(void *);
253 static void     igb_msix_link(void *);
254
255 #ifdef DEVICE_POLLING
256 static poll_handler_t igb_poll;
257 #endif /* POLLING */
258
259 /*********************************************************************
260  *  FreeBSD Device Interface Entry Points
261  *********************************************************************/
262
263 static device_method_t igb_methods[] = {
264         /* Device interface */
265         DEVMETHOD(device_probe, igb_probe),
266         DEVMETHOD(device_attach, igb_attach),
267         DEVMETHOD(device_detach, igb_detach),
268         DEVMETHOD(device_shutdown, igb_shutdown),
269         DEVMETHOD(device_suspend, igb_suspend),
270         DEVMETHOD(device_resume, igb_resume),
271         {0, 0}
272 };
273
274 static driver_t igb_driver = {
275         "igb", igb_methods, sizeof(struct adapter),
276 };
277
278 static devclass_t igb_devclass;
279 DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, NULL, NULL);
280 MODULE_DEPEND(igb, pci, 1, 1, 1);
281 MODULE_DEPEND(igb, ether, 1, 1, 1);
282
283 /*********************************************************************
284  *  Tunable default values.
285  *********************************************************************/
286
287 /* Descriptor defaults */
288 static int igb_rxd = IGB_DEFAULT_RXD;
289 static int igb_txd = IGB_DEFAULT_TXD;
290 TUNABLE_INT("hw.igb.rxd", &igb_rxd);
291 TUNABLE_INT("hw.igb.txd", &igb_txd);
292
293 /*
294 ** AIM: Adaptive Interrupt Moderation
295 ** which means that the interrupt rate
296 ** is varied over time based on the
297 ** traffic for that interrupt vector
298 */
299 static int igb_enable_aim = TRUE;
300 TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
301
302 /*
303  * MSIX should be the default for best performance,
304  * but this allows it to be forced off for testing.
305  */         
306 static int igb_enable_msix = 0;
307 TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
308
309 /*
310  * Header split has seemed to be beneficial in
311  * many circumstances tested, however there have
312  * been some stability issues, so the default is
313  * off. 
314  */
315 static bool igb_header_split = FALSE;
316 TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
317
318 /*
319 ** This will autoconfigure based on
320 ** the number of CPUs if left at 0.
321 */
322 static int igb_num_queues = 0;
323 TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
324
325 /* How many packets rxeof tries to clean at a time */
326 static int igb_rx_process_limit = 100;
327 TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
328
329 /* Flow control setting - default to FULL */
330 static int igb_fc_setting = e1000_fc_full;
331 TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
332
333 /*
334 ** Shadow VFTA table, this is needed because
335 ** the real filter table gets cleared during
336 ** a soft reset and the driver needs to be able
337 ** to repopulate it.
338 */
339 static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
340
341
342 /*********************************************************************
343  *  Device identification routine
344  *
345  *  igb_probe determines if the driver should be loaded on
346  *  adapter based on PCI vendor/device id of the adapter.
347  *
348  *  return BUS_PROBE_DEFAULT on success, positive on failure
349  *********************************************************************/
350
351 static int
352 igb_probe(device_t dev)
353 {
354         char            adapter_name[60];
355         uint16_t        pci_vendor_id = 0;
356         uint16_t        pci_device_id = 0;
357         uint16_t        pci_subvendor_id = 0;
358         uint16_t        pci_subdevice_id = 0;
359         igb_vendor_info_t *ent;
360
361         INIT_DEBUGOUT("igb_probe: begin");
362
363         pci_vendor_id = pci_get_vendor(dev);
364         if (pci_vendor_id != IGB_VENDOR_ID)
365                 return (ENXIO);
366
367         pci_device_id = pci_get_device(dev);
368         pci_subvendor_id = pci_get_subvendor(dev);
369         pci_subdevice_id = pci_get_subdevice(dev);
370
371         ent = igb_vendor_info_array;
372         while (ent->vendor_id != 0) {
373                 if ((pci_vendor_id == ent->vendor_id) &&
374                     (pci_device_id == ent->device_id) &&
375
376                     ((pci_subvendor_id == ent->subvendor_id) ||
377                     (ent->subvendor_id == PCI_ANY_ID)) &&
378
379                     ((pci_subdevice_id == ent->subdevice_id) ||
380                     (ent->subdevice_id == PCI_ANY_ID))) {
381                         ksprintf(adapter_name, "%s %s",
382                                 igb_strings[ent->index],
383                                 igb_driver_version);
384                         device_set_desc_copy(dev, adapter_name);
385                         return (BUS_PROBE_DEFAULT);
386                 }
387                 ent++;
388         }
389
390         return (ENXIO);
391 }
392
393 /*********************************************************************
394  *  Device initialization routine
395  *
396  *  The attach entry point is called when the driver is being loaded.
397  *  This routine identifies the type of hardware, allocates all resources
398  *  and initializes the hardware.
399  *
400  *  return 0 on success, positive on failure
401  *********************************************************************/
402
403 static int
404 igb_attach(device_t dev)
405 {
406         struct adapter  *adapter;
407         int             error = 0;
408         u16             eeprom_data;
409
410         INIT_DEBUGOUT("igb_attach: begin");
411
412         adapter = device_get_softc(dev);
413         adapter->dev = adapter->osdep.dev = dev;
414         IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
415
416         /* SYSCTL stuff */
417         sysctl_ctx_init(&adapter->sysctl_ctx);
418         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
419                                         SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
420                                         device_get_nameunit(adapter->dev),
421                                         CTLFLAG_RD, 0, "");
422         if (adapter->sysctl_tree == NULL) {
423                 device_printf(adapter->dev, "can't add sysctl node\n");
424                 error = ENOMEM;
425                 goto err_sysctl;
426         }
427
428         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
429             SYSCTL_CHILDREN(adapter->sysctl_tree),
430             OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
431             igb_sysctl_debug_info, "I", "Debug Information");
432
433         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
434             SYSCTL_CHILDREN(adapter->sysctl_tree),
435             OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
436             igb_sysctl_stats, "I", "Statistics");
437
438         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
439             SYSCTL_CHILDREN(adapter->sysctl_tree),
440             OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
441             &igb_fc_setting, 0, "Flow Control");
442
443         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
444             SYSCTL_CHILDREN(adapter->sysctl_tree),
445             OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
446             &igb_enable_aim, 1, "Interrupt Moderation");
447
448         callout_init_mp(&adapter->timer);
449
450         /* Determine hardware and mac info */
451         igb_identify_hardware(adapter);
452
453         /* Setup PCI resources */
454         if (igb_allocate_pci_resources(adapter)) {
455                 device_printf(dev, "Allocation of PCI resources failed\n");
456                 error = ENXIO;
457                 goto err_pci;
458         }
459
460         /* Do Shared Code initialization */
461         if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
462                 device_printf(dev, "Setup of Shared code failed\n");
463                 error = ENXIO;
464                 goto err_pci;
465         }
466
467         e1000_get_bus_info(&adapter->hw);
468
469         /* Sysctls for limiting the amount of work done in the taskqueue */
470         igb_add_rx_process_limit(adapter, "rx_processing_limit",
471             "max number of rx packets to process", &adapter->rx_process_limit,
472             igb_rx_process_limit);
473
474         /*
475          * Validate number of transmit and receive descriptors. It
476          * must not exceed hardware maximum, and must be multiple
477          * of E1000_DBA_ALIGN.
478          */
479         if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
480             (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
481                 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
482                     IGB_DEFAULT_TXD, igb_txd);
483                 adapter->num_tx_desc = IGB_DEFAULT_TXD;
484         } else
485                 adapter->num_tx_desc = igb_txd;
486         if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
487             (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
488                 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
489                     IGB_DEFAULT_RXD, igb_rxd);
490                 adapter->num_rx_desc = IGB_DEFAULT_RXD;
491         } else
492                 adapter->num_rx_desc = igb_rxd;
493
494         adapter->hw.mac.autoneg = DO_AUTO_NEG;
495         adapter->hw.phy.autoneg_wait_to_complete = FALSE;
496         adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
497
498         /* Copper options */
499         if (adapter->hw.phy.media_type == e1000_media_type_copper) {
500                 adapter->hw.phy.mdix = AUTO_ALL_MODES;
501                 adapter->hw.phy.disable_polarity_correction = FALSE;
502                 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
503         }
504
505         /*
506          * Set the frame limits assuming
507          * standard ethernet sized frames.
508          */
509         adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
510         adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
511
512         /*
513         ** Allocate and Setup Queues
514         */
515         if (igb_allocate_queues(adapter)) {
516                 error = ENOMEM;
517                 goto err_pci;
518         }
519
520         /*
521         ** Start from a known state, this is
522         ** important in reading the nvm and
523         ** mac from that.
524         */
525         e1000_reset_hw(&adapter->hw);
526
527         /* Make sure we have a good EEPROM before we read from it */
528         if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
529                 /*
530                 ** Some PCI-E parts fail the first check due to
531                 ** the link being in sleep state, call it again,
532                 ** if it fails a second time its a real issue.
533                 */
534                 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
535                         device_printf(dev,
536                             "The EEPROM Checksum Is Not Valid\n");
537                         error = EIO;
538                         goto err_late;
539                 }
540         }
541
542         /*
543         ** Copy the permanent MAC address out of the EEPROM
544         */
545         if (e1000_read_mac_addr(&adapter->hw) < 0) {
546                 device_printf(dev, "EEPROM read error while reading MAC"
547                     " address\n");
548                 error = EIO;
549                 goto err_late;
550         }
551         /* Check its sanity */
552         if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
553                 device_printf(dev, "Invalid MAC address\n");
554                 error = EIO;
555                 goto err_late;
556         }
557
558         /* 
559         ** Configure Interrupts
560         */
561         if ((adapter->msix > 1) && (igb_enable_msix))
562                 error = igb_allocate_msix(adapter);
563         else /* MSI or Legacy */
564                 error = igb_allocate_legacy(adapter);
565         if (error)
566                 goto err_late;
567
568         /* Setup OS specific network interface */
569         igb_setup_interface(dev, adapter);
570
571         /* Now get a good starting state */
572         igb_reset(adapter);
573
574         /* Initialize statistics */
575         igb_update_stats_counters(adapter);
576
577         adapter->hw.mac.get_link_status = 1;
578         igb_update_link_status(adapter);
579
580         /* Indicate SOL/IDER usage */
581         if (e1000_check_reset_block(&adapter->hw))
582                 device_printf(dev,
583                     "PHY reset is blocked due to SOL/IDER session.\n");
584
585         /* Determine if we have to control management hardware */
586         adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
587
588         /*
589          * Setup Wake-on-Lan
590          */
591         /* APME bit in EEPROM is mapped to WUC.APME */
592         eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
593         if (eeprom_data)
594                 adapter->wol = E1000_WUFC_MAG;
595
596         /* Register for VLAN events */
597         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
598              igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
599         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
600              igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
601
602         /* Tell the stack that the interface is not active */
603         adapter->ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
604
605         INIT_DEBUGOUT("igb_attach: end");
606
607         return (0);
608
609 err_late:
610         igb_free_transmit_structures(adapter);
611         igb_free_receive_structures(adapter);
612         igb_release_hw_control(adapter);
613 err_pci:
614         igb_free_pci_resources(adapter);
615 err_sysctl:
616         sysctl_ctx_free(&adapter->sysctl_ctx);
617         IGB_CORE_LOCK_DESTROY(adapter);
618
619         return (error);
620 }
621
622 /*********************************************************************
623  *  Device removal routine
624  *
625  *  The detach entry point is called when the driver is being removed.
626  *  This routine stops the adapter and deallocates all the resources
627  *  that were allocated for driver operation.
628  *
629  *  return 0 on success, positive on failure
630  *********************************************************************/
631
632 static int
633 igb_detach(device_t dev)
634 {
635         struct adapter  *adapter = device_get_softc(dev);
636
637         INIT_DEBUGOUT("igb_detach: begin");
638
639         /* Make sure VLANS are not using driver */
640         if (adapter->ifp->if_vlantrunks != NULL) {
641                 device_printf(dev,"Vlan in use, detach first\n");
642                 return (EBUSY);
643         }
644
645         IGB_CORE_LOCK(adapter);
646         adapter->in_detach = 1;
647         igb_stop(adapter);
648         IGB_CORE_UNLOCK(adapter);
649
650         e1000_phy_hw_reset(&adapter->hw);
651
652         /* Give control back to firmware */
653         igb_release_manageability(adapter);
654         igb_release_hw_control(adapter);
655
656         if (adapter->wol) {
657                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
658                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
659                 igb_enable_wakeup(dev);
660         }
661
662         /* Unregister VLAN events */
663         if (adapter->vlan_attach != NULL)
664                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
665         if (adapter->vlan_detach != NULL)
666                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
667
668         ether_ifdetach(adapter->ifp);
669
670         //callout_drain(&adapter->timer);
671         callout_stop(&adapter->timer);
672
673         igb_free_pci_resources(adapter);
674         bus_generic_detach(dev);
675
676         igb_free_transmit_structures(adapter);
677         igb_free_receive_structures(adapter);
678
679         sysctl_ctx_free(&adapter->sysctl_ctx);
680         IGB_CORE_LOCK_DESTROY(adapter);
681
682         return (0);
683 }
684
685 /*********************************************************************
686  *
687  *  Shutdown entry point
688  *
689  **********************************************************************/
690
691 static int
692 igb_shutdown(device_t dev)
693 {
694         return igb_suspend(dev);
695 }
696
697 /*
698  * Suspend/resume device methods.
699  */
700 static int
701 igb_suspend(device_t dev)
702 {
703         struct adapter *adapter = device_get_softc(dev);
704
705         IGB_CORE_LOCK(adapter);
706
707         igb_stop(adapter);
708
709         igb_release_manageability(adapter);
710         igb_release_hw_control(adapter);
711
712         if (adapter->wol) {
713                 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
714                 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
715                 igb_enable_wakeup(dev);
716         }
717
718         IGB_CORE_UNLOCK(adapter);
719
720         return bus_generic_suspend(dev);
721 }
722
723 static int
724 igb_resume(device_t dev)
725 {
726         struct adapter *adapter = device_get_softc(dev);
727         struct ifnet *ifp = adapter->ifp;
728
729         IGB_CORE_LOCK(adapter);
730         igb_init_locked(adapter);
731         igb_init_manageability(adapter);
732
733         if ((ifp->if_flags & IFF_UP) &&
734             (ifp->if_flags & IFF_RUNNING))
735                 igb_start(ifp);
736
737         IGB_CORE_UNLOCK(adapter);
738
739         return bus_generic_resume(dev);
740 }
741
742
743 /*********************************************************************
744  *  Transmit entry point
745  *
746  *  igb_start is called by the stack to initiate a transmit.
747  *  The driver will remain in this routine as long as there are
748  *  packets to transmit and transmit resources are available.
749  *  In case resources are not available stack is notified and
750  *  the packet is requeued.
751  **********************************************************************/
752
753 static void
754 igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
755 {
756         struct adapter  *adapter = ifp->if_softc;
757         struct mbuf     *m_head;
758
759         IGB_TX_LOCK_ASSERT(txr);
760
761         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
762                 return;
763
764         /*
765          * Must purge on abort from this point on or the netif will call
766          * us endlessly.  Either that or set IFF_OACTIVE.
767          */
768         if (!adapter->link_active) {
769                 ifq_purge(&ifp->if_snd);
770                 return;
771         }
772
773         while (!ifq_is_empty(&ifp->if_snd)) {
774
775                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
776                 if (m_head == NULL)
777                         break;
778                 /*
779                  *  Encapsulation can modify our pointer, and or make it
780                  *  NULL on failure.  In that event, we can't requeue.
781                  */
782                 if (igb_xmit(txr, &m_head)) {
783                         if (m_head == NULL)
784                                 break;
785                         ifp->if_flags |= IFF_OACTIVE;
786                         ifq_prepend(&ifp->if_snd, m_head);
787                         break;
788                 }
789
790                 /* Send a copy of the frame to the BPF listener */
791                 ETHER_BPF_MTAP(ifp, m_head);
792
793                 /* Set watchdog on */
794                 txr->watchdog_check = TRUE;
795         }
796 }
797  
798 /*
799  * Legacy TX driver routine, called from the
800  * stack, always uses tx[0], and spins for it.
801  * Should not be used with multiqueue tx
802  */
803 static void
804 igb_start(struct ifnet *ifp)
805 {
806         struct adapter  *adapter = ifp->if_softc;
807         struct tx_ring  *txr = adapter->tx_rings;
808
809         if (ifp->if_flags & IFF_RUNNING) {
810                 IGB_TX_LOCK(txr);
811                 igb_start_locked(txr, ifp);
812                 IGB_TX_UNLOCK(txr);
813         }
814         return;
815 }
816
817 #if __FreeBSD_version >= 800000
818 /*
819 ** Multiqueue Transmit driver
820 **
821 */
822 static int
823 igb_mq_start(struct ifnet *ifp, struct mbuf *m)
824 {
825         struct adapter  *adapter = ifp->if_softc;
826         struct tx_ring  *txr;
827         int             i = 0, err = 0;
828
829         /* Which queue to use */
830         if ((m->m_flags & M_FLOWID) != 0)
831                 i = m->m_pkthdr.flowid % adapter->num_queues;
832         txr = &adapter->tx_rings[i];
833
834         if (IGB_TX_TRYLOCK(txr)) {
835                 err = igb_mq_start_locked(ifp, txr, m);
836                 IGB_TX_UNLOCK(txr);
837         } else
838                 err = drbr_enqueue(ifp, txr->br, m);
839
840         return (err);
841 }
842
843 static int
844 igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
845 {
846         struct adapter  *adapter = txr->adapter;
847         struct mbuf     *next;
848         int             err = 0, enq;
849
850         IGB_TX_LOCK_ASSERT(txr);
851
852         if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) !=
853             IFF_RUNNING || adapter->link_active == 0) {
854                 if (m != NULL)
855                         err = drbr_enqueue(ifp, txr->br, m);
856                 return (err);
857         }
858
859         enq = 0;
860         if (m == NULL) {
861                 next = drbr_dequeue(ifp, txr->br);
862         } else if (drbr_needs_enqueue(ifp, txr->br)) {
863                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
864                         return (err);
865                 next = drbr_dequeue(ifp, txr->br);
866         } else
867                 next = m;
868         /* Process the queue */
869         while (next != NULL) {
870                 if ((err = igb_xmit(txr, &next)) != 0) {
871                         if (next != NULL)
872                                 err = drbr_enqueue(ifp, txr->br, next);
873                         break;
874                 }
875                 enq++;
876                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
877                 ETHER_BPF_MTAP(ifp, next);
878                 if ((ifp->if_flags & IFF_RUNNING) == 0)
879                         break;
880                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
881                         ifp->if_flags |= IFF_OACTIVE;
882                         break;
883                 }
884                 next = drbr_dequeue(ifp, txr->br);
885         }
886         if (enq > 0) {
887                 /* Set the watchdog */
888                 txr->watchdog_check = TRUE;
889         }
890         return (err);
891 }
892
893 /*
894 ** Flush all ring buffers
895 */
896 static void
897 igb_qflush(struct ifnet *ifp)
898 {
899         struct adapter  *adapter = ifp->if_softc;
900         struct tx_ring  *txr = adapter->tx_rings;
901         struct mbuf     *m;
902
903         for (int i = 0; i < adapter->num_queues; i++, txr++) {
904                 IGB_TX_LOCK(txr);
905                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
906                         m_freem(m);
907                 IGB_TX_UNLOCK(txr);
908         }
909         if_qflush(ifp);
910 }
911 #endif /* __FreeBSD_version >= 800000 */
912
913 /*********************************************************************
914  *  Ioctl entry point
915  *
916  *  igb_ioctl is called when the user wants to configure the
917  *  interface.
918  *
919  *  return 0 on success, positive on failure
920  **********************************************************************/
921
922 static int
923 igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cred)
924 {
925         struct adapter  *adapter = ifp->if_softc;
926         struct ifreq *ifr = (struct ifreq *)data;
927 #ifdef INET
928         struct ifaddr *ifa = (struct ifaddr *)data;
929 #endif
930         int error = 0;
931
932         if (adapter->in_detach)
933                 return (error);
934
935         switch (command) {
936         case SIOCSIFADDR:
937 #ifdef INET
938                 if (ifa->ifa_addr->sa_family == AF_INET) {
939                         /*
940                          * XXX
941                          * Since resetting hardware takes a very long time
942                          * and results in link renegotiation we only
943                          * initialize the hardware only when it is absolutely
944                          * required.
945                          */
946                         ifp->if_flags |= IFF_UP;
947                         if (!(ifp->if_flags & IFF_RUNNING)) {
948                                 IGB_CORE_LOCK(adapter);
949                                 igb_init_locked(adapter);
950                                 IGB_CORE_UNLOCK(adapter);
951                         }
952                         if (!(ifp->if_flags & IFF_NOARP))
953                                 arp_ifinit(ifp, ifa);
954                 } else
955 #endif
956                         error = ether_ioctl(ifp, command, data);
957                 break;
958         case SIOCSIFMTU:
959             {
960                 int max_frame_size;
961
962                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
963
964                 IGB_CORE_LOCK(adapter);
965                 max_frame_size = 9234;
966                 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
967                     ETHER_CRC_LEN) {
968                         IGB_CORE_UNLOCK(adapter);
969                         error = EINVAL;
970                         break;
971                 }
972
973                 ifp->if_mtu = ifr->ifr_mtu;
974                 adapter->max_frame_size =
975                     ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
976                 igb_init_locked(adapter);
977                 IGB_CORE_UNLOCK(adapter);
978                 break;
979             }
980         case SIOCSIFFLAGS:
981                 IOCTL_DEBUGOUT("ioctl rcv'd:\
982                     SIOCSIFFLAGS (Set Interface Flags)");
983                 IGB_CORE_LOCK(adapter);
984                 if (ifp->if_flags & IFF_UP) {
985                         if ((ifp->if_flags & IFF_RUNNING)) {
986                                 if ((ifp->if_flags ^ adapter->if_flags) &
987                                     (IFF_PROMISC | IFF_ALLMULTI)) {
988                                         igb_disable_promisc(adapter);
989                                         igb_set_promisc(adapter);
990                                 }
991                         } else
992                                 igb_init_locked(adapter);
993                 } else
994                         if (ifp->if_flags & IFF_RUNNING)
995                                 igb_stop(adapter); 
996                 adapter->if_flags = ifp->if_flags;
997                 IGB_CORE_UNLOCK(adapter);
998                 break;
999         case SIOCADDMULTI:
1000         case SIOCDELMULTI:
1001                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1002                 if (ifp->if_flags & IFF_RUNNING) {
1003                         IGB_CORE_LOCK(adapter);
1004                         igb_disable_intr(adapter);
1005                         igb_set_multi(adapter);
1006 #ifdef DEVICE_POLLING
1007                         if ((ifp->if_flags & IFF_POLLING) == 0)
1008 #endif
1009                                 igb_enable_intr(adapter);
1010                         IGB_CORE_UNLOCK(adapter);
1011                 }
1012                 break;
1013         case SIOCSIFMEDIA:
1014                 /* Check SOL/IDER usage */
1015                 IGB_CORE_LOCK(adapter);
1016                 if (e1000_check_reset_block(&adapter->hw)) {
1017                         IGB_CORE_UNLOCK(adapter);
1018                         device_printf(adapter->dev, "Media change is"
1019                             " blocked due to SOL/IDER session.\n");
1020                         break;
1021                 }
1022                 IGB_CORE_UNLOCK(adapter);
1023         case SIOCGIFMEDIA:
1024                 IOCTL_DEBUGOUT("ioctl rcv'd: \
1025                     SIOCxIFMEDIA (Get/Set Interface Media)");
1026                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1027                 break;
1028         case SIOCSIFCAP:
1029             {
1030                 int mask, reinit;
1031
1032                 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1033                 reinit = 0;
1034                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1035 #ifdef DEVICE_POLLING
1036                 if (ifp->if_flags & IFF_POLLING) {
1037                         IGB_CORE_LOCK(adapter);
1038                         igb_disable_intr(adapter);
1039                         IGB_CORE_UNLOCK(adapter);
1040                 }
1041 #endif
1042                 if (mask & IFCAP_HWCSUM) {
1043                         ifp->if_capenable ^= IFCAP_HWCSUM;
1044                         reinit = 1;
1045                 }
1046 #ifdef NET_TSO 
1047                 if (mask & IFCAP_TSO4) {
1048                         ifp->if_capenable ^= IFCAP_TSO4;
1049                         reinit = 1;
1050                 }
1051 #endif
1052                 if (mask & IFCAP_VLAN_HWTAGGING) {
1053                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1054                         reinit = 1;
1055                 }
1056 #ifdef NET_LRO 
1057                 if (mask & IFCAP_LRO) {
1058                         ifp->if_capenable ^= IFCAP_LRO;
1059                         reinit = 1;
1060                 }
1061 #endif
1062                 if (reinit && (ifp->if_flags & IFF_RUNNING))
1063                         igb_init(adapter);
1064 #if 0
1065                 VLAN_CAPABILITIES(ifp);
1066 #endif
1067                 break;
1068             }
1069
1070         default:
1071                 error = ether_ioctl(ifp, command, data);
1072                 break;
1073         }
1074         IOCTL_DEBUGOUT("ioctl done");
1075
1076         return (error);
1077 }
1078
1079
1080 /*********************************************************************
1081  *  Init entry point
1082  *
1083  *  This routine is used in two ways. It is used by the stack as
1084  *  init entry point in network interface structure. It is also used
1085  *  by the driver as a hw/sw initialization routine to get to a
1086  *  consistent state.
1087  *
1088  *  return 0 on success, positive on failure
1089  **********************************************************************/
1090
1091 static void
1092 igb_init_locked(struct adapter *adapter)
1093 {
1094         struct ifnet    *ifp = adapter->ifp;
1095         device_t        dev = adapter->dev;
1096
1097         INIT_DEBUGOUT("igb_init: begin");
1098
1099         IGB_CORE_LOCK_ASSERT(adapter);
1100
1101         igb_disable_intr(adapter);
1102         callout_stop(&adapter->timer);
1103
1104         /* Get the latest mac address, User can use a LAA */
1105         bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1106               ETHER_ADDR_LEN);
1107
1108         /* Put the address into the Receive Address Array */
1109         e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1110
1111         igb_reset(adapter);
1112         igb_update_link_status(adapter);
1113
1114         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1115
1116         /* Set hardware offload abilities */
1117         ifp->if_hwassist = 0;
1118         if (ifp->if_capenable & IFCAP_TXCSUM) {
1119                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1120 #if __FreeBSD_version >= 800000
1121                 if (adapter->hw.mac.type == e1000_82576)
1122                         ifp->if_hwassist |= CSUM_SCTP;
1123 #endif
1124         }
1125
1126 #ifdef NET_TSO
1127         if (ifp->if_capenable & IFCAP_TSO4)
1128                 ifp->if_hwassist |= CSUM_TSO;
1129 #endif
1130
1131         /* Configure for OS presence */
1132         igb_init_manageability(adapter);
1133
1134         /* Prepare transmit descriptors and buffers */
1135         igb_setup_transmit_structures(adapter);
1136         igb_initialize_transmit_units(adapter);
1137
1138         /* Setup Multicast table */
1139         igb_set_multi(adapter);
1140
1141         /*
1142         ** Figure out the desired mbuf pool
1143         ** for doing jumbo/packetsplit
1144         */
1145         if (ifp->if_mtu > ETHERMTU)
1146                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1147         else
1148                 adapter->rx_mbuf_sz = MCLBYTES;
1149
1150         /* Prepare receive descriptors and buffers */
1151         if (igb_setup_receive_structures(adapter)) {
1152                 device_printf(dev, "Could not setup receive structures\n");
1153                 return;
1154         }
1155         igb_initialize_receive_units(adapter);
1156
1157         /* Don't lose promiscuous settings */
1158         igb_set_promisc(adapter);
1159
1160         ifp->if_flags |= IFF_RUNNING;
1161         ifp->if_flags &= ~IFF_OACTIVE;
1162
1163         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1164         e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1165
1166         if (adapter->msix > 1) /* Set up queue routing */
1167                 igb_configure_queues(adapter);
1168
1169         /* Set up VLAN tag offload and filter */
1170         igb_setup_vlan_hw_support(adapter);
1171
1172         /* this clears any pending interrupts */
1173         E1000_READ_REG(&adapter->hw, E1000_ICR);
1174 #ifdef DEVICE_POLLING
1175         /*
1176          * Only enable interrupts if we are not polling, make sure
1177          * they are off otherwise.
1178          */
1179         if (ifp->if_flags & IFF_POLLING)
1180                 igb_disable_intr(adapter);
1181         else
1182 #endif /* DEVICE_POLLING */
1183         {
1184         igb_enable_intr(adapter);
1185         E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1186         }
1187
1188         /* Don't reset the phy next time init gets called */
1189         adapter->hw.phy.reset_disable = TRUE;
1190         INIT_DEBUGOUT("igb_init: end");
1191 }
1192
1193 static void
1194 igb_init(void *arg)
1195 {
1196         struct adapter *adapter = arg;
1197
1198         IGB_CORE_LOCK(adapter);
1199         igb_init_locked(adapter);
1200         IGB_CORE_UNLOCK(adapter);
1201 }
1202
1203
1204 static void
1205 igb_handle_rxtx(void *context, int pending)
1206 {
1207         struct adapter  *adapter = context;
1208         struct tx_ring  *txr = adapter->tx_rings;
1209         struct rx_ring  *rxr = adapter->rx_rings;
1210         struct ifnet    *ifp;
1211
1212         ifp = adapter->ifp;
1213
1214         if (ifp->if_flags & IFF_RUNNING) {
1215                 if (igb_rxeof(rxr, adapter->rx_process_limit))
1216                         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1217                 IGB_TX_LOCK(txr);
1218                 igb_txeof(txr);
1219
1220 #if __FreeBSD_version >= 800000
1221                 if (!drbr_empty(ifp, txr->br))
1222                         igb_mq_start_locked(ifp, txr, NULL);
1223 #else
1224                 if (!ifq_is_empty(&ifp->if_snd))
1225                         igb_start_locked(txr, ifp);
1226 #endif
1227                 IGB_TX_UNLOCK(txr);
1228         }
1229
1230         igb_enable_intr(adapter);
1231 }
1232
1233 static void
1234 igb_handle_que(void *context, int pending)
1235 {
1236         struct igb_queue *que = context;
1237         struct adapter *adapter = que->adapter;
1238         struct tx_ring *txr = que->txr;
1239         struct rx_ring *rxr = que->rxr;
1240         struct ifnet    *ifp = adapter->ifp;
1241         u32             loop = IGB_MAX_LOOP;
1242         bool            more;
1243
1244         /* RX first */
1245         do {
1246                 more = igb_rxeof(rxr, -1);
1247         } while (loop-- && more);
1248
1249         if (IGB_TX_TRYLOCK(txr)) {
1250                 loop = IGB_MAX_LOOP;
1251                 do {
1252                         more = igb_txeof(txr);
1253                 } while (loop-- && more);
1254 #if __FreeBSD_version >= 800000
1255                 igb_mq_start_locked(ifp, txr, NULL);
1256 #else
1257                 if (!ifq_is_empty(&ifp->if_snd))
1258                         igb_start_locked(txr, ifp);
1259 #endif
1260                 IGB_TX_UNLOCK(txr);
1261         }
1262
1263         /* Reenable this interrupt */
1264 #ifdef DEVICE_POLLING
1265         if ((ifp->if_flags & IFF_POLLING) == 0)
1266 #endif
1267                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1268 }
1269
1270 /* Deal with link in a sleepable context */
1271 static void
1272 igb_handle_link(void *context, int pending)
1273 {
1274         struct adapter *adapter = context;
1275
1276         adapter->hw.mac.get_link_status = 1;
1277         igb_update_link_status(adapter);
1278 }
1279
1280 /*********************************************************************
1281  *
1282  *  MSI/Legacy Deferred
1283  *  Interrupt Service routine  
1284  *
1285  *********************************************************************/
1286 #define FILTER_STRAY
1287 #define FILTER_HANDLED
1288 static void
1289 igb_irq_fast(void *arg)
1290 {
1291         struct adapter  *adapter = arg;
1292         uint32_t        reg_icr;
1293
1294
1295         reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1296
1297         /* Hot eject?  */
1298         if (reg_icr == 0xffffffff)
1299                 return FILTER_STRAY; 
1300
1301         /* Definitely not our interrupt.  */
1302         if (reg_icr == 0x0)
1303                 return FILTER_STRAY;
1304
1305         if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1306                 return FILTER_STRAY;
1307
1308         /*
1309          * Mask interrupts until the taskqueue is finished running.  This is
1310          * cheap, just assume that it is needed.  This also works around the
1311          * MSI message reordering errata on certain systems.
1312          */
1313         igb_disable_intr(adapter);
1314         taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1315
1316         /* Link status change */
1317         if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1318                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1319
1320         if (reg_icr & E1000_ICR_RXO)
1321                 adapter->rx_overruns++;
1322         return FILTER_HANDLED;
1323 }
1324
1325 #ifdef DEVICE_POLLING
1326 /*********************************************************************
1327  *
1328  *  Legacy polling routine  
1329  *
1330  *********************************************************************/
1331 #if __FreeBSD_version >= 800000
1332 #define POLL_RETURN_COUNT(a) (a)
1333 static int
1334 #else
1335 #define POLL_RETURN_COUNT(a)
1336 static void
1337 #endif
1338 igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1339 {
1340         struct adapter *adapter = ifp->if_softc;
1341         struct rx_ring  *rxr = adapter->rx_rings;
1342         struct tx_ring  *txr = adapter->tx_rings;
1343         u32             reg_icr, rx_done = 0;
1344         u32             loop = IGB_MAX_LOOP;
1345         bool            more;
1346
1347         IGB_CORE_LOCK(adapter);
1348         if ((ifp->if_flags & IFF_RUNNING) == 0) {
1349                 IGB_CORE_UNLOCK(adapter);
1350                 return POLL_RETURN_COUNT(rx_done);
1351         }
1352
1353         if (cmd == POLL_AND_CHECK_STATUS) {
1354                 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1355                 /* Link status change */
1356                 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1357                         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1358
1359                 if (reg_icr & E1000_ICR_RXO)
1360                         adapter->rx_overruns++;
1361         }
1362         IGB_CORE_UNLOCK(adapter);
1363
1364         /* TODO: rx_count */
1365         rx_done = igb_rxeof(rxr, count) ? 1 : 0;
1366
1367         IGB_TX_LOCK(txr);
1368         do {
1369                 more = igb_txeof(txr);
1370         } while (loop-- && more);
1371 #if __FreeBSD_version >= 800000
1372         if (!drbr_empty(ifp, txr->br))
1373                 igb_mq_start_locked(ifp, txr, NULL);
1374 #else
1375         if (!ifq_is_empty(&ifp->if_snd))
1376                 igb_start_locked(txr, ifp);
1377 #endif
1378         IGB_TX_UNLOCK(txr);
1379         return POLL_RETURN_COUNT(rx_done);
1380 }
1381 #endif /* DEVICE_POLLING */
1382
1383 /*********************************************************************
1384  *
1385  *  MSIX TX Interrupt Service routine
1386  *
1387  **********************************************************************/
1388 static void
1389 igb_msix_que(void *arg)
1390 {
1391         struct igb_queue *que = arg;
1392         struct adapter *adapter = que->adapter;
1393         struct tx_ring *txr = que->txr;
1394         struct rx_ring *rxr = que->rxr;
1395         u32             newitr = 0;
1396         bool            more_tx, more_rx;
1397
1398         E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1399         ++que->irqs;
1400
1401         IGB_TX_LOCK(txr);
1402         more_tx = igb_txeof(txr);
1403         IGB_TX_UNLOCK(txr);
1404
1405         more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1406
1407         if (igb_enable_aim == FALSE)
1408                 goto no_calc;
1409         /*
1410         ** Do Adaptive Interrupt Moderation:
1411         **  - Write out last calculated setting
1412         **  - Calculate based on average size over
1413         **    the last interval.
1414         */
1415         if (que->eitr_setting)
1416                 E1000_WRITE_REG(&adapter->hw,
1417                     E1000_EITR(que->msix), que->eitr_setting);
1418  
1419         que->eitr_setting = 0;
1420
1421         /* Idle, do nothing */
1422         if ((txr->bytes == 0) && (rxr->bytes == 0))
1423                 goto no_calc;
1424                                 
1425         /* Used half Default if sub-gig */
1426         if (adapter->link_speed != 1000)
1427                 newitr = IGB_DEFAULT_ITR / 2;
1428         else {
1429                 if ((txr->bytes) && (txr->packets))
1430                         newitr = txr->bytes/txr->packets;
1431                 if ((rxr->bytes) && (rxr->packets))
1432                         newitr = max(newitr,
1433                             (rxr->bytes / rxr->packets));
1434                 newitr += 24; /* account for hardware frame, crc */
1435                 /* set an upper boundary */
1436                 newitr = min(newitr, 3000);
1437                 /* Be nice to the mid range */
1438                 if ((newitr > 300) && (newitr < 1200))
1439                         newitr = (newitr / 3);
1440                 else
1441                         newitr = (newitr / 2);
1442         }
1443         newitr &= 0x7FFC;  /* Mask invalid bits */
1444         if (adapter->hw.mac.type == e1000_82575)
1445                 newitr |= newitr << 16;
1446         else
1447                 newitr |= 0x8000000;
1448                  
1449         /* save for next interrupt */
1450         que->eitr_setting = newitr;
1451
1452         /* Reset state */
1453         txr->bytes = 0;
1454         txr->packets = 0;
1455         rxr->bytes = 0;
1456         rxr->packets = 0;
1457
1458 no_calc:
1459         /* Schedule a clean task if needed*/
1460         if (more_tx || more_rx) 
1461                 taskqueue_enqueue(que->tq, &que->que_task);
1462         else
1463                 /* Reenable this interrupt */
1464                 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1465         return;
1466 }
1467
1468
1469 /*********************************************************************
1470  *
1471  *  MSIX Link Interrupt Service routine
1472  *
1473  **********************************************************************/
1474
1475 static void
1476 igb_msix_link(void *arg)
1477 {
1478         struct adapter  *adapter = arg;
1479         u32             icr;
1480
1481         ++adapter->link_irq;
1482         icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1483         if (!(icr & E1000_ICR_LSC))
1484                 goto spurious;
1485         taskqueue_enqueue(adapter->tq, &adapter->link_task);
1486
1487 spurious:
1488         /* Rearm */
1489         E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1490         E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1491         return;
1492 }
1493
1494
1495 /*********************************************************************
1496  *
1497  *  Media Ioctl callback
1498  *
1499  *  This routine is called whenever the user queries the status of
1500  *  the interface using ifconfig.
1501  *
1502  **********************************************************************/
1503 static void
1504 igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1505 {
1506         struct adapter *adapter = ifp->if_softc;
1507         u_char fiber_type = IFM_1000_SX;
1508
1509         INIT_DEBUGOUT("igb_media_status: begin");
1510
1511         IGB_CORE_LOCK(adapter);
1512         igb_update_link_status(adapter);
1513
1514         ifmr->ifm_status = IFM_AVALID;
1515         ifmr->ifm_active = IFM_ETHER;
1516
1517         if (!adapter->link_active) {
1518                 IGB_CORE_UNLOCK(adapter);
1519                 return;
1520         }
1521
1522         ifmr->ifm_status |= IFM_ACTIVE;
1523
1524         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1525             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1526                 ifmr->ifm_active |= fiber_type | IFM_FDX;
1527         else {
1528                 switch (adapter->link_speed) {
1529                 case 10:
1530                         ifmr->ifm_active |= IFM_10_T;
1531                         break;
1532                 case 100:
1533                         ifmr->ifm_active |= IFM_100_TX;
1534                         break;
1535                 case 1000:
1536                         ifmr->ifm_active |= IFM_1000_T;
1537                         break;
1538                 }
1539                 if (adapter->link_duplex == FULL_DUPLEX)
1540                         ifmr->ifm_active |= IFM_FDX;
1541                 else
1542                         ifmr->ifm_active |= IFM_HDX;
1543         }
1544         IGB_CORE_UNLOCK(adapter);
1545 }
1546
1547 /*********************************************************************
1548  *
1549  *  Media Ioctl callback
1550  *
1551  *  This routine is called when the user changes speed/duplex using
1552  *  media/mediopt option with ifconfig.
1553  *
1554  **********************************************************************/
1555 static int
1556 igb_media_change(struct ifnet *ifp)
1557 {
1558         struct adapter *adapter = ifp->if_softc;
1559         struct ifmedia  *ifm = &adapter->media;
1560
1561         INIT_DEBUGOUT("igb_media_change: begin");
1562
1563         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1564                 return (EINVAL);
1565
1566         IGB_CORE_LOCK(adapter);
1567         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1568         case IFM_AUTO:
1569                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1570                 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1571                 break;
1572         case IFM_1000_LX:
1573         case IFM_1000_SX:
1574         case IFM_1000_T:
1575                 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1576                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1577                 break;
1578         case IFM_100_TX:
1579                 adapter->hw.mac.autoneg = FALSE;
1580                 adapter->hw.phy.autoneg_advertised = 0;
1581                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1582                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1583                 else
1584                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1585                 break;
1586         case IFM_10_T:
1587                 adapter->hw.mac.autoneg = FALSE;
1588                 adapter->hw.phy.autoneg_advertised = 0;
1589                 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1590                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1591                 else
1592                         adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1593                 break;
1594         default:
1595                 device_printf(adapter->dev, "Unsupported media type\n");
1596         }
1597
1598         /* As the speed/duplex settings my have changed we need to
1599          * reset the PHY.
1600          */
1601         adapter->hw.phy.reset_disable = FALSE;
1602
1603         igb_init_locked(adapter);
1604         IGB_CORE_UNLOCK(adapter);
1605
1606         return (0);
1607 }
1608
1609
1610 /*********************************************************************
1611  *
1612  *  This routine maps the mbufs to Advanced TX descriptors.
1613  *  used by the 82575 adapter.
1614  *  
1615  **********************************************************************/
1616
1617 static int
1618 igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1619 {
1620         struct adapter          *adapter = txr->adapter;
1621         bus_dma_segment_t       segs[IGB_MAX_SCATTER];
1622         bus_dmamap_t            map;
1623         struct igb_tx_buffer    *tx_buffer, *tx_buffer_mapped;
1624         union e1000_adv_tx_desc *txd = NULL;
1625         struct mbuf             *m_head;
1626         u32                     olinfo_status = 0, cmd_type_len = 0;
1627         int                     nsegs, i, j, error, first, last = 0;
1628         u32                     hdrlen = 0;
1629
1630         m_head = *m_headp;
1631
1632
1633         /* Set basic descriptor constants */
1634         cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1635         cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1636         if (m_head->m_flags & M_VLANTAG)
1637                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1638
1639         /*
1640          * Force a cleanup if number of TX descriptors
1641          * available hits the threshold
1642          */
1643         if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1644                 igb_txeof(txr);
1645                 /* Now do we at least have a minimal? */
1646                 if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1647                         txr->no_desc_avail++;
1648                         return (ENOBUFS);
1649                 }
1650         }
1651
1652         /*
1653          * Map the packet for DMA.
1654          *
1655          * Capture the first descriptor index,
1656          * this descriptor will have the index
1657          * of the EOP which is the only one that
1658          * now gets a DONE bit writeback.
1659          */
1660         first = txr->next_avail_desc;
1661         tx_buffer = &txr->tx_buffers[first];
1662         tx_buffer_mapped = tx_buffer;
1663         map = tx_buffer->map;
1664
1665         error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1666             *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1667
1668         if (error == EFBIG) {
1669                 struct mbuf *m;
1670
1671                 m = m_defrag(*m_headp, MB_DONTWAIT);
1672                 if (m == NULL) {
1673                         adapter->mbuf_defrag_failed++;
1674                         m_freem(*m_headp);
1675                         *m_headp = NULL;
1676                         return (ENOBUFS);
1677                 }
1678                 *m_headp = m;
1679
1680                 /* Try it again */
1681                 error = bus_dmamap_load_mbuf_segment(txr->txtag, map,
1682                     *m_headp, segs, IGB_MAX_SCATTER, &nsegs, BUS_DMA_NOWAIT);
1683
1684                 if (error == ENOMEM) {
1685                         adapter->no_tx_dma_setup++;
1686                         return (error);
1687                 } else if (error != 0) {
1688                         adapter->no_tx_dma_setup++;
1689                         m_freem(*m_headp);
1690                         *m_headp = NULL;
1691                         return (error);
1692                 }
1693         } else if (error == ENOMEM) {
1694                 adapter->no_tx_dma_setup++;
1695                 return (error);
1696         } else if (error != 0) {
1697                 adapter->no_tx_dma_setup++;
1698                 m_freem(*m_headp);
1699                 *m_headp = NULL;
1700                 return (error);
1701         }
1702
1703         /* Check again to be sure we have enough descriptors */
1704         if (nsegs > (txr->tx_avail - 2)) {
1705                 txr->no_desc_avail++;
1706                 bus_dmamap_unload(txr->txtag, map);
1707                 return (ENOBUFS);
1708         }
1709         m_head = *m_headp;
1710
1711         /*
1712          * Set up the context descriptor:
1713          * used when any hardware offload is done.
1714          * This includes CSUM, VLAN, and TSO. It
1715          * will use the first descriptor.
1716          */
1717 #ifdef NET_TSO
1718         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1719                 if (igb_tso_setup(txr, m_head, &hdrlen)) {
1720                         cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1721                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1722                         olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1723                 } else
1724                         return (ENXIO); 
1725         } else
1726 #endif
1727                if (igb_tx_ctx_setup(txr, m_head))
1728                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1729
1730         /* Calculate payload length */
1731         olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1732             << E1000_ADVTXD_PAYLEN_SHIFT);
1733
1734         /* 82575 needs the queue index added */
1735         if (adapter->hw.mac.type == e1000_82575)
1736                 olinfo_status |= txr->me << 4;
1737
1738         /* Set up our transmit descriptors */
1739         i = txr->next_avail_desc;
1740         for (j = 0; j < nsegs; j++) {
1741                 bus_size_t seg_len;
1742                 bus_addr_t seg_addr;
1743
1744                 tx_buffer = &txr->tx_buffers[i];
1745                 txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1746                 seg_addr = segs[j].ds_addr;
1747                 seg_len  = segs[j].ds_len;
1748
1749                 txd->read.buffer_addr = htole64(seg_addr);
1750                 txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1751                 txd->read.olinfo_status = htole32(olinfo_status);
1752                 last = i;
1753                 if (++i == adapter->num_tx_desc)
1754                         i = 0;
1755                 tx_buffer->m_head = NULL;
1756                 tx_buffer->next_eop = -1;
1757         }
1758
1759         txr->next_avail_desc = i;
1760         txr->tx_avail -= nsegs;
1761
1762         tx_buffer->m_head = m_head;
1763         tx_buffer_mapped->map = tx_buffer->map;
1764         tx_buffer->map = map;
1765         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1766
1767         /*
1768          * Last Descriptor of Packet
1769          * needs End Of Packet (EOP)
1770          * and Report Status (RS)
1771          */
1772         txd->read.cmd_type_len |=
1773             htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1774         /*
1775          * Keep track in the first buffer which
1776          * descriptor will be written back
1777          */
1778         tx_buffer = &txr->tx_buffers[first];
1779         tx_buffer->next_eop = last;
1780         txr->watchdog_time = ticks;
1781
1782         /*
1783          * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1784          * that this frame is available to transmit.
1785          */
1786         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1787             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1788         E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1789         ++txr->tx_packets;
1790
1791         return (0);
1792
1793 }
1794
1795 static void
1796 igb_set_promisc(struct adapter *adapter)
1797 {
1798         struct ifnet    *ifp = adapter->ifp;
1799         uint32_t        reg_rctl;
1800
1801         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1802
1803         if (ifp->if_flags & IFF_PROMISC) {
1804                 reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1805                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1806         } else if (ifp->if_flags & IFF_ALLMULTI) {
1807                 reg_rctl |= E1000_RCTL_MPE;
1808                 reg_rctl &= ~E1000_RCTL_UPE;
1809                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1810         }
1811 }
1812
1813 static void
1814 igb_disable_promisc(struct adapter *adapter)
1815 {
1816         uint32_t        reg_rctl;
1817
1818         reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1819
1820         reg_rctl &=  (~E1000_RCTL_UPE);
1821         reg_rctl &=  (~E1000_RCTL_MPE);
1822         E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1823 }
1824
1825
1826 /*********************************************************************
1827  *  Multicast Update
1828  *
1829  *  This routine is called whenever multicast address list is updated.
1830  *
1831  **********************************************************************/
1832
1833 static void
1834 igb_set_multi(struct adapter *adapter)
1835 {
1836         struct ifnet    *ifp = adapter->ifp;
1837         struct ifmultiaddr *ifma;
1838         u32 reg_rctl = 0;
1839         static u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1840
1841         int mcnt = 0;
1842
1843         IOCTL_DEBUGOUT("igb_set_multi: begin");
1844
1845 #if 0
1846 #if __FreeBSD_version < 800000
1847         IF_ADDR_LOCK(ifp);
1848 #else
1849         if_maddr_rlock(ifp);
1850 #endif
1851 #endif
1852
1853         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1854                 if (ifma->ifma_addr->sa_family != AF_LINK)
1855                         continue;
1856
1857                 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1858                         break;
1859
1860                 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1861                     &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1862                 mcnt++;
1863         }
1864 #if 0
1865 #if __FreeBSD_version < 800000
1866         IF_ADDR_UNLOCK(ifp);
1867 #else
1868         if_maddr_runlock(ifp);
1869 #endif
1870 #endif
1871
1872         if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1873                 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1874                 reg_rctl |= E1000_RCTL_MPE;
1875                 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1876         } else {
1877                 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1878         }
1879 }
1880
1881
1882 /*********************************************************************
1883  *  Timer routine:
1884  *      This routine checks for link status,
1885  *      updates statistics, and does the watchdog.
1886  *
1887  **********************************************************************/
1888
1889 static void
1890 igb_local_timer(void *arg)
1891 {
1892         struct adapter          *adapter = arg;
1893
1894         IGB_CORE_LOCK(adapter);
1895
1896         struct ifnet            *ifp = adapter->ifp;
1897         device_t                dev = adapter->dev;
1898         struct tx_ring          *txr = adapter->tx_rings;
1899
1900
1901         IGB_CORE_LOCK_ASSERT(adapter);
1902
1903         igb_update_link_status(adapter);
1904         igb_update_stats_counters(adapter);
1905
1906         if (igb_display_debug_stats && ifp->if_flags & IFF_RUNNING)
1907                 igb_print_hw_stats(adapter);
1908
1909         /*
1910         ** Watchdog: check for time since any descriptor was cleaned
1911         */
1912         for (int i = 0; i < adapter->num_queues; i++, txr++) {
1913                 if (txr->watchdog_check == FALSE)
1914                         continue;
1915                 if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1916                         goto timeout;
1917         }
1918
1919         /* Trigger an RX interrupt on all queues */
1920 #ifdef DEVICE_POLLING
1921         if ((ifp->if_flags & IFF_POLLING) == 0)
1922 #endif
1923                 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1924         callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1925         IGB_CORE_UNLOCK(adapter);
1926         return;
1927
1928 timeout:
1929         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1930         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1931             E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1932             E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1933         device_printf(dev,"TX(%d) desc avail = %d,"
1934             "Next TX to Clean = %d\n",
1935             txr->me, txr->tx_avail, txr->next_to_clean);
1936         adapter->ifp->if_flags &= ~IFF_RUNNING;
1937         adapter->watchdog_events++;
1938         igb_init_locked(adapter);
1939         IGB_CORE_UNLOCK(adapter);
1940 }
1941
1942 static void
1943 igb_update_link_status(struct adapter *adapter)
1944 {
1945         struct e1000_hw *hw = &adapter->hw;
1946         struct ifnet *ifp = adapter->ifp;
1947         device_t dev = adapter->dev;
1948         struct tx_ring *txr = adapter->tx_rings;
1949         u32 link_check = 0;
1950
1951         /* Get the cached link value or read for real */
1952         switch (hw->phy.media_type) {
1953         case e1000_media_type_copper:
1954                 if (hw->mac.get_link_status) {
1955                         /* Do the work to read phy */
1956                         e1000_check_for_link(hw);
1957                         link_check = !hw->mac.get_link_status;
1958                 } else
1959                         link_check = TRUE;
1960                 break;
1961         case e1000_media_type_fiber:
1962                 e1000_check_for_link(hw);
1963                 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1964                                  E1000_STATUS_LU);
1965                 break;
1966         case e1000_media_type_internal_serdes:
1967                 e1000_check_for_link(hw);
1968                 link_check = adapter->hw.mac.serdes_has_link;
1969                 break;
1970         default:
1971         case e1000_media_type_unknown:
1972                 break;
1973         }
1974
1975         /* Now we check if a transition has happened */
1976         if (link_check && (adapter->link_active == 0)) {
1977                 e1000_get_speed_and_duplex(&adapter->hw, 
1978                     &adapter->link_speed, &adapter->link_duplex);
1979                 if (bootverbose)
1980                         device_printf(dev, "Link is up %d Mbps %s\n",
1981                             adapter->link_speed,
1982                             ((adapter->link_duplex == FULL_DUPLEX) ?
1983                             "Full Duplex" : "Half Duplex"));
1984                 adapter->link_active = 1;
1985                 ifp->if_baudrate = adapter->link_speed * 1000000;
1986                 ifp->if_link_state = LINK_STATE_UP;
1987                 if_link_state_change(ifp);
1988         } else if (!link_check && (adapter->link_active == 1)) {
1989                 ifp->if_baudrate = adapter->link_speed = 0;
1990                 adapter->link_duplex = 0;
1991                 if (bootverbose)
1992                         device_printf(dev, "Link is Down\n");
1993                 adapter->link_active = 0;
1994                 ifp->if_link_state = LINK_STATE_DOWN;
1995                 if_link_state_change(ifp);
1996                 /* Turn off watchdogs */
1997                 for (int i = 0; i < adapter->num_queues; i++, txr++)
1998                         txr->watchdog_check = FALSE;
1999         }
2000 }
2001
2002 /*********************************************************************
2003  *
2004  *  This routine disables all traffic on the adapter by issuing a
2005  *  global reset on the MAC and deallocates TX/RX buffers.
2006  *
2007  **********************************************************************/
2008
2009 static void
2010 igb_stop(void *arg)
2011 {
2012         struct adapter  *adapter = arg;
2013         struct ifnet    *ifp = adapter->ifp;
2014         struct tx_ring *txr = adapter->tx_rings;
2015
2016         IGB_CORE_LOCK_ASSERT(adapter);
2017
2018         INIT_DEBUGOUT("igb_stop: begin");
2019
2020         igb_disable_intr(adapter);
2021
2022         callout_stop(&adapter->timer);
2023
2024         /* Tell the stack that the interface is no longer active */
2025         ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
2026
2027         /* Unarm watchdog timer. */
2028         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2029                 IGB_TX_LOCK(txr);
2030                 txr->watchdog_check = FALSE;
2031                 IGB_TX_UNLOCK(txr);
2032         }
2033
2034         e1000_reset_hw(&adapter->hw);
2035         E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2036 }
2037
2038
2039 /*********************************************************************
2040  *
2041  *  Determine hardware revision.
2042  *
2043  **********************************************************************/
2044 static void
2045 igb_identify_hardware(struct adapter *adapter)
2046 {
2047         device_t dev = adapter->dev;
2048
2049         /* Make sure our PCI config space has the necessary stuff set */
2050         adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2051         if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2052             (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2053                 device_printf(dev, "Memory Access and/or Bus Master bits "
2054                     "were not set!\n");
2055                 adapter->hw.bus.pci_cmd_word |=
2056                 (PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2057                 pci_write_config(dev, PCIR_COMMAND,
2058                     adapter->hw.bus.pci_cmd_word, 2);
2059         }
2060
2061         /* Save off the information about this board */
2062         adapter->hw.vendor_id = pci_get_vendor(dev);
2063         adapter->hw.device_id = pci_get_device(dev);
2064         adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2065         adapter->hw.subsystem_vendor_id =
2066             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2067         adapter->hw.subsystem_device_id =
2068             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2069
2070         /* Do Shared Code Init and Setup */
2071         if (e1000_set_mac_type(&adapter->hw)) {
2072                 device_printf(dev, "Setup init failure\n");
2073                 return;
2074         }
2075 }
2076
2077 static int
2078 igb_allocate_pci_resources(struct adapter *adapter)
2079 {
2080         device_t        dev = adapter->dev;
2081         int             rid;
2082
2083         rid = PCIR_BAR(0);
2084         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2085             &rid, RF_ACTIVE);
2086         if (adapter->pci_mem == NULL) {
2087                 device_printf(dev, "Unable to allocate bus resource: memory\n");
2088                 return (ENXIO);
2089         }
2090         adapter->osdep.mem_bus_space_tag =
2091             rman_get_bustag(adapter->pci_mem);
2092         adapter->osdep.mem_bus_space_handle =
2093             rman_get_bushandle(adapter->pci_mem);
2094         adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2095
2096         adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2097
2098         /* This will setup either MSI/X or MSI */
2099         adapter->msix = igb_setup_msix(adapter);
2100         adapter->hw.back = &adapter->osdep;
2101
2102         return (0);
2103 }
2104
2105 /*********************************************************************
2106  *
2107  *  Setup the Legacy or MSI Interrupt handler
2108  *
2109  **********************************************************************/
2110 static int
2111 igb_allocate_legacy(struct adapter *adapter)
2112 {
2113         device_t dev = adapter->dev;
2114         int error, rid = 0;
2115
2116         /* Turn off all interrupts */
2117         E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2118
2119 #if 0
2120         /* MSI RID is 1 */
2121         if (adapter->msix == 1)
2122                 rid = 1;
2123 #endif
2124         rid = 0;
2125         /* We allocate a single interrupt resource */
2126         adapter->res = bus_alloc_resource_any(dev,
2127             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2128         if (adapter->res == NULL) {
2129                 device_printf(dev, "Unable to allocate bus resource: "
2130                     "interrupt\n");
2131                 return (ENXIO);
2132         }
2133
2134         /*
2135          * Try allocating a fast interrupt and the associated deferred
2136          * processing contexts.
2137          */
2138         TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2139         /* Make tasklet for deferred link handling */
2140         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2141         adapter->tq = taskqueue_create("igb_taskq", M_INTWAIT,
2142             taskqueue_thread_enqueue, &adapter->tq);
2143         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s taskq",
2144             device_get_nameunit(adapter->dev));
2145         if ((error = bus_setup_intr(dev, adapter->res,
2146             /*INTR_TYPE_NET |*/ INTR_MPSAFE, igb_irq_fast,
2147             adapter, &adapter->tag, NULL)) != 0) {
2148                 device_printf(dev, "Failed to register fast interrupt "
2149                             "handler: %d\n", error);
2150                 taskqueue_free(adapter->tq);
2151                 adapter->tq = NULL;
2152                 return (error);
2153         }
2154
2155         return (0);
2156 }
2157
2158
2159 /*********************************************************************
2160  *
2161  *  Setup the MSIX Queue Interrupt handlers: 
2162  *
2163  **********************************************************************/
2164 static int
2165 igb_allocate_msix(struct adapter *adapter)
2166 {
2167         device_t                dev = adapter->dev;
2168         struct igb_queue        *que = adapter->queues;
2169         int                     error, rid, vector = 0;
2170
2171
2172         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2173                 rid = vector + 1;
2174                 que->res = bus_alloc_resource_any(dev,
2175                     SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2176                 if (que->res == NULL) {
2177                         device_printf(dev,
2178                             "Unable to allocate bus resource: "
2179                             "MSIX Queue Interrupt\n");
2180                         return (ENXIO);
2181                 }
2182                 error = bus_setup_intr(dev, que->res,
2183                     /*INTR_TYPE_NET |*/ INTR_MPSAFE, 
2184                     igb_msix_que, que, &que->tag, NULL);
2185                 if (error) {
2186                         que->res = NULL;
2187                         device_printf(dev, "Failed to register Queue handler");
2188                         return (error);
2189                 }
2190                 que->msix = vector;
2191                 if (adapter->hw.mac.type == e1000_82575)
2192                         que->eims = E1000_EICR_TX_QUEUE0 << i;
2193                 else
2194                         que->eims = 1 << vector;
2195                 /*
2196                 ** Bind the msix vector, and thus the
2197                 ** rings to the corresponding cpu.
2198                 */
2199 #if 0
2200                 if (adapter->num_queues > 1)
2201                         bus_bind_intr(dev, que->res, i);
2202 #endif
2203                 /* Make tasklet for deferred handling */
2204                 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2205                 que->tq = taskqueue_create("igb_que", M_INTWAIT,
2206                     taskqueue_thread_enqueue, &que->tq);
2207                 taskqueue_start_threads(&que->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s que",
2208                     device_get_nameunit(adapter->dev));
2209         }
2210
2211         /* And Link */
2212         rid = vector + 1;
2213         adapter->res = bus_alloc_resource_any(dev,
2214             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2215         if (adapter->res == NULL) {
2216                 device_printf(dev,
2217                     "Unable to allocate bus resource: "
2218                     "MSIX Link Interrupt\n");
2219                 return (ENXIO);
2220         }
2221         if ((error = bus_setup_intr(dev, adapter->res,
2222             /*INTR_TYPE_NET |*/ INTR_MPSAFE,
2223             igb_msix_link, adapter, &adapter->tag, NULL)) != 0) {
2224                 device_printf(dev, "Failed to register Link handler");
2225                 return (error);
2226         }
2227         adapter->linkvec = vector;
2228
2229         /* Make tasklet for deferred handling */
2230         TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2231         adapter->tq = taskqueue_create("igb_link", M_INTWAIT,
2232             taskqueue_thread_enqueue, &adapter->tq);
2233         taskqueue_start_threads(&adapter->tq, 1, TDPRI_KERN_DAEMON /*PI_NET*/, -1, "%s link",
2234             device_get_nameunit(adapter->dev));
2235
2236         return (0);
2237 }
2238
2239
2240 static void
2241 igb_configure_queues(struct adapter *adapter)
2242 {
2243         struct  e1000_hw        *hw = &adapter->hw;
2244         struct  igb_queue       *que;
2245         u32                     tmp, ivar = 0;
2246         u32                     newitr = IGB_DEFAULT_ITR;
2247
2248         /* First turn on RSS capability */
2249         if (adapter->hw.mac.type > e1000_82575)
2250                 E1000_WRITE_REG(hw, E1000_GPIE,
2251                     E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2252                     E1000_GPIE_PBA | E1000_GPIE_NSICR);
2253
2254         /* Turn on MSIX */
2255         switch (adapter->hw.mac.type) {
2256         case e1000_82580:
2257                 /* RX entries */
2258                 for (int i = 0; i < adapter->num_queues; i++) {
2259                         u32 index = i >> 1;
2260                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2261                         que = &adapter->queues[i];
2262                         if (i & 1) {
2263                                 ivar &= 0xFF00FFFF;
2264                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2265                         } else {
2266                                 ivar &= 0xFFFFFF00;
2267                                 ivar |= que->msix | E1000_IVAR_VALID;
2268                         }
2269                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2270                 }
2271                 /* TX entries */
2272                 for (int i = 0; i < adapter->num_queues; i++) {
2273                         u32 index = i >> 1;
2274                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2275                         que = &adapter->queues[i];
2276                         if (i & 1) {
2277                                 ivar &= 0x00FFFFFF;
2278                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2279                         } else {
2280                                 ivar &= 0xFFFF00FF;
2281                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2282                         }
2283                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2284                         adapter->eims_mask |= que->eims;
2285                 }
2286
2287                 /* And for the link interrupt */
2288                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2289                 adapter->link_mask = 1 << adapter->linkvec;
2290                 adapter->eims_mask |= adapter->link_mask;
2291                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2292                 break;
2293         case e1000_82576:
2294                 /* RX entries */
2295                 for (int i = 0; i < adapter->num_queues; i++) {
2296                         u32 index = i & 0x7; /* Each IVAR has two entries */
2297                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2298                         que = &adapter->queues[i];
2299                         if (i < 8) {
2300                                 ivar &= 0xFFFFFF00;
2301                                 ivar |= que->msix | E1000_IVAR_VALID;
2302                         } else {
2303                                 ivar &= 0xFF00FFFF;
2304                                 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2305                         }
2306                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2307                         adapter->eims_mask |= que->eims;
2308                 }
2309                 /* TX entries */
2310                 for (int i = 0; i < adapter->num_queues; i++) {
2311                         u32 index = i & 0x7; /* Each IVAR has two entries */
2312                         ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2313                         que = &adapter->queues[i];
2314                         if (i < 8) {
2315                                 ivar &= 0xFFFF00FF;
2316                                 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2317                         } else {
2318                                 ivar &= 0x00FFFFFF;
2319                                 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2320                         }
2321                         E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2322                         adapter->eims_mask |= que->eims;
2323                 }
2324
2325                 /* And for the link interrupt */
2326                 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2327                 adapter->link_mask = 1 << adapter->linkvec;
2328                 adapter->eims_mask |= adapter->link_mask;
2329                 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2330                 break;
2331
2332         case e1000_82575:
2333                 /* enable MSI-X support*/
2334                 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2335                 tmp |= E1000_CTRL_EXT_PBA_CLR;
2336                 /* Auto-Mask interrupts upon ICR read. */
2337                 tmp |= E1000_CTRL_EXT_EIAME;
2338                 tmp |= E1000_CTRL_EXT_IRCA;
2339                 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2340
2341                 /* Queues */
2342                 for (int i = 0; i < adapter->num_queues; i++) {
2343                         que = &adapter->queues[i];
2344                         tmp = E1000_EICR_RX_QUEUE0 << i;
2345                         tmp |= E1000_EICR_TX_QUEUE0 << i;
2346                         que->eims = tmp;
2347                         E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2348                             i, que->eims);
2349                         adapter->eims_mask |= que->eims;
2350                 }
2351
2352                 /* Link */
2353                 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2354                     E1000_EIMS_OTHER);
2355                 adapter->link_mask |= E1000_EIMS_OTHER;
2356                 adapter->eims_mask |= adapter->link_mask;
2357         default:
2358                 break;
2359         }
2360
2361         /* Set the starting interrupt rate */
2362         if (hw->mac.type == e1000_82575)
2363                 newitr |= newitr << 16;
2364         else
2365                 newitr |= 0x8000000;
2366
2367         for (int i = 0; i < adapter->num_queues; i++) {
2368                 que = &adapter->queues[i];
2369                 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2370         }
2371
2372         return;
2373 }
2374
2375
2376 static void
2377 igb_free_pci_resources(struct adapter *adapter)
2378 {
2379         struct          igb_queue *que = adapter->queues;
2380         device_t        dev = adapter->dev;
2381         int             rid;
2382
2383         /*
2384         ** There is a slight possibility of a failure mode
2385         ** in attach that will result in entering this function
2386         ** before interrupt resources have been initialized, and
2387         ** in that case we do not want to execute the loops below
2388         ** We can detect this reliably by the state of the adapter
2389         ** res pointer.
2390         */
2391         if (adapter->res == NULL)
2392                 goto mem;
2393
2394         /*
2395          * First release all the interrupt resources:
2396          */
2397         for (int i = 0; i < adapter->num_queues; i++, que++) {
2398                 rid = que->msix + 1;
2399                 if (que->tag != NULL) {
2400                         bus_teardown_intr(dev, que->res, que->tag);
2401                         que->tag = NULL;
2402                 }
2403                 if (que->res != NULL)
2404                         bus_release_resource(dev,
2405                             SYS_RES_IRQ, rid, que->res);
2406         }
2407
2408         /* Clean the Legacy or Link interrupt last */
2409         if (adapter->linkvec) /* we are doing MSIX */
2410                 rid = adapter->linkvec + 1;
2411         else
2412                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2413
2414         if (adapter->tag != NULL) {
2415                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2416                 adapter->tag = NULL;
2417         }
2418         if (adapter->res != NULL)
2419                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2420
2421 mem:
2422         if (adapter->msix)
2423                 pci_release_msi(dev);
2424
2425         if (adapter->msix_mem != NULL)
2426                 bus_release_resource(dev, SYS_RES_MEMORY,
2427                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2428
2429         if (adapter->pci_mem != NULL)
2430                 bus_release_resource(dev, SYS_RES_MEMORY,
2431                     PCIR_BAR(0), adapter->pci_mem);
2432
2433 }
2434
2435 /*
2436  * Setup Either MSI/X or MSI
2437  */
2438 static int
2439 igb_setup_msix(struct adapter *adapter)
2440 {
2441         device_t dev = adapter->dev;
2442         int rid, want, queues, msgs;
2443
2444         /* tuneable override */
2445         if (igb_enable_msix == 0)
2446                 goto msi;
2447
2448         /* First try MSI/X */
2449         rid = PCIR_BAR(IGB_MSIX_BAR);
2450         adapter->msix_mem = bus_alloc_resource_any(dev,
2451             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2452         if (!adapter->msix_mem) {
2453                 /* May not be enabled */
2454                 device_printf(adapter->dev,
2455                     "Unable to map MSIX table \n");
2456                 goto msi;
2457         }
2458
2459         msgs = pci_msix_count(dev); 
2460         if (msgs == 0) { /* system has msix disabled */
2461                 bus_release_resource(dev, SYS_RES_MEMORY,
2462                     PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2463                 adapter->msix_mem = NULL;
2464                 goto msi;
2465         }
2466
2467         /* Figure out a reasonable auto config value */
2468         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2469
2470         /* Can have max of 4 queues on 82575 */
2471         if (adapter->hw.mac.type == e1000_82575) {
2472                 if (queues > 4)
2473                         queues = 4;
2474                 if (igb_num_queues > 4)
2475                         igb_num_queues = 4;
2476         }
2477
2478         if (igb_num_queues == 0)
2479                 igb_num_queues = queues;
2480
2481         /*
2482         ** One vector (RX/TX pair) per queue
2483         ** plus an additional for Link interrupt
2484         */
2485         want = igb_num_queues + 1;
2486         if (msgs >= want)
2487                 msgs = want;
2488         else {
2489                 device_printf(adapter->dev,
2490                     "MSIX Configuration Problem, "
2491                     "%d vectors configured, but %d queues wanted!\n",
2492                     msgs, want);
2493                 return (ENXIO);
2494         }
2495         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2496                 device_printf(adapter->dev,
2497                     "Using MSIX interrupts with %d vectors\n", msgs);
2498                 adapter->num_queues = igb_num_queues;
2499                 return (msgs);
2500         }
2501 msi:
2502         msgs = pci_msi_count(dev);
2503         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2504                 device_printf(adapter->dev,"Using MSI interrupt\n");
2505         return (msgs);
2506 }
2507
2508 /*********************************************************************
2509  *
2510  *  Set up an fresh starting state
2511  *
2512  **********************************************************************/
2513 static void
2514 igb_reset(struct adapter *adapter)
2515 {
2516         device_t        dev = adapter->dev;
2517         struct e1000_hw *hw = &adapter->hw;
2518         struct e1000_fc_info *fc = &hw->fc;
2519         struct ifnet    *ifp = adapter->ifp;
2520         u32             pba = 0;
2521         u16             hwm;
2522
2523         INIT_DEBUGOUT("igb_reset: begin");
2524
2525         /* Let the firmware know the OS is in control */
2526         igb_get_hw_control(adapter);
2527
2528         /*
2529          * Packet Buffer Allocation (PBA)
2530          * Writing PBA sets the receive portion of the buffer
2531          * the remainder is used for the transmit buffer.
2532          */
2533         switch (hw->mac.type) {
2534         case e1000_82575:
2535                 pba = E1000_PBA_32K;
2536                 break;
2537         case e1000_82576:
2538                 pba = E1000_PBA_64K;
2539                 break;
2540         case e1000_82580:
2541                 pba = E1000_PBA_35K;
2542         default:
2543                 break;
2544         }
2545
2546         /* Special needs in case of Jumbo frames */
2547         if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2548                 u32 tx_space, min_tx, min_rx;
2549                 pba = E1000_READ_REG(hw, E1000_PBA);
2550                 tx_space = pba >> 16;
2551                 pba &= 0xffff;
2552                 min_tx = (adapter->max_frame_size +
2553                     sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2554                 min_tx = roundup2(min_tx, 1024);
2555                 min_tx >>= 10;
2556                 min_rx = adapter->max_frame_size;
2557                 min_rx = roundup2(min_rx, 1024);
2558                 min_rx >>= 10;
2559                 if (tx_space < min_tx &&
2560                     ((min_tx - tx_space) < pba)) {
2561                         pba = pba - (min_tx - tx_space);
2562                         /*
2563                          * if short on rx space, rx wins
2564                          * and must trump tx adjustment
2565                          */
2566                         if (pba < min_rx)
2567                                 pba = min_rx;
2568                 }
2569                 E1000_WRITE_REG(hw, E1000_PBA, pba);
2570         }
2571
2572         INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2573
2574         /*
2575          * These parameters control the automatic generation (Tx) and
2576          * response (Rx) to Ethernet PAUSE frames.
2577          * - High water mark should allow for at least two frames to be
2578          *   received after sending an XOFF.
2579          * - Low water mark works best when it is very near the high water mark.
2580          *   This allows the receiver to restart by sending XON when it has
2581          *   drained a bit.
2582          */
2583         hwm = min(((pba << 10) * 9 / 10),
2584             ((pba << 10) - 2 * adapter->max_frame_size));
2585
2586         if (hw->mac.type < e1000_82576) {
2587                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2588                 fc->low_water = fc->high_water - 8;
2589         } else {
2590                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2591                 fc->low_water = fc->high_water - 16;
2592         }
2593
2594         fc->pause_time = IGB_FC_PAUSE_TIME;
2595         fc->send_xon = TRUE;
2596
2597         /* Set Flow control, use the tunable location if sane */
2598         if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2599                 fc->requested_mode = igb_fc_setting;
2600         else
2601                 fc->requested_mode = e1000_fc_none;
2602
2603         fc->current_mode = fc->requested_mode;
2604
2605         /* Issue a global reset */
2606         e1000_reset_hw(hw);
2607         E1000_WRITE_REG(hw, E1000_WUC, 0);
2608
2609         if (e1000_init_hw(hw) < 0)
2610                 device_printf(dev, "Hardware Initialization Failed\n");
2611
2612         if (hw->mac.type == e1000_82580) {
2613                 u32 reg;
2614
2615                 hwm = (pba << 10) - (2 * adapter->max_frame_size);
2616                 /*
2617                  * 0x80000000 - enable DMA COAL
2618                  * 0x10000000 - use L0s as low power
2619                  * 0x20000000 - use L1 as low power
2620                  * X << 16 - exit dma coal when rx data exceeds X kB
2621                  * Y - upper limit to stay in dma coal in units of 32usecs
2622                  */
2623                 E1000_WRITE_REG(hw, E1000_DMACR,
2624                     0xA0000006 | ((hwm << 6) & 0x00FF0000));
2625
2626                 /* set hwm to PBA -  2 * max frame size */
2627                 E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2628                 /*
2629                  * This sets the time to wait before requesting transition to
2630                  * low power state to number of usecs needed to receive 1 512
2631                  * byte frame at gigabit line rate
2632                  */
2633                 E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2634
2635                 /* free space in tx packet buffer to wake from DMA coal */
2636                 E1000_WRITE_REG(hw, E1000_DMCTXTH,
2637                     (20480 - (2 * adapter->max_frame_size)) >> 6);
2638
2639                 /* make low power state decision controlled by DMA coal */
2640                 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2641                 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2642                     reg | E1000_PCIEMISC_LX_DECISION);
2643         }
2644
2645         E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2646         e1000_get_phy_info(hw);
2647         e1000_check_for_link(hw);
2648         return;
2649 }
2650
2651 /*********************************************************************
2652  *
2653  *  Setup networking device structure and register an interface.
2654  *
2655  **********************************************************************/
2656 static void
2657 igb_setup_interface(device_t dev, struct adapter *adapter)
2658 {
2659         struct ifnet   *ifp;
2660
2661         INIT_DEBUGOUT("igb_setup_interface: begin");
2662
2663         ifp = adapter->ifp = &adapter->arpcom.ac_if;
2664         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2665         ifp->if_mtu = ETHERMTU;
2666         ifp->if_init =  igb_init;
2667         ifp->if_softc = adapter;
2668         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2669         ifp->if_ioctl = igb_ioctl;
2670         ifp->if_start = igb_start;
2671 #ifdef DEVICE_POLLING
2672         ifp->if_poll = igb_poll;
2673 #endif
2674 #if __FreeBSD_version >= 800000
2675         ifp->if_transmit = igb_mq_start;
2676         ifp->if_qflush = igb_qflush;
2677 #endif
2678         ifq_set_maxlen(&ifp->if_snd, adapter->num_tx_desc - 1);
2679         ifq_set_ready(&ifp->if_snd);
2680
2681         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2682
2683         ifp->if_capabilities = ifp->if_capenable = 0;
2684
2685         ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2686 #ifdef NET_TSO
2687         ifp->if_capabilities |= IFCAP_TSO4;
2688 #endif
2689         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2690 #ifdef NET_LRO
2691         if (igb_header_split)
2692                 ifp->if_capabilities |= IFCAP_LRO;
2693 #endif
2694
2695         ifp->if_capenable = ifp->if_capabilities;
2696
2697         /*
2698          * Tell the upper layer(s) we support long frames.
2699          */
2700         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2701         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2702         ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2703
2704         /*
2705          * Specify the media types supported by this adapter and register
2706          * callbacks to update media and link information
2707          */
2708         ifmedia_init(&adapter->media, IFM_IMASK,
2709             igb_media_change, igb_media_status);
2710         if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2711             (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2712                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 
2713                             0, NULL);
2714                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2715         } else {
2716                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2717                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2718                             0, NULL);
2719                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2720                             0, NULL);
2721                 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2722                             0, NULL);
2723                 if (adapter->hw.phy.type != e1000_phy_ife) {
2724                         ifmedia_add(&adapter->media,
2725                                 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2726                         ifmedia_add(&adapter->media,
2727                                 IFM_ETHER | IFM_1000_T, 0, NULL);
2728                 }
2729         }
2730         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2731         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2732 }
2733
2734
2735 /*
2736  * Manage DMA'able memory.
2737  */
2738 static void
2739 igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2740 {
2741         if (error)
2742                 return;
2743         *(bus_addr_t *) arg = segs[0].ds_addr;
2744 }
2745
2746 static int
2747 igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2748         struct igb_dma_alloc *dma, int mapflags)
2749 {
2750         int error;
2751
2752         error = bus_dma_tag_create(NULL,                /* parent */
2753                                 IGB_DBA_ALIGN, 0,       /* alignment, bounds */
2754                                 BUS_SPACE_MAXADDR,      /* lowaddr */
2755                                 BUS_SPACE_MAXADDR,      /* highaddr */
2756                                 NULL, NULL,             /* filter, filterarg */
2757                                 size,                   /* maxsize */
2758                                 1,                      /* nsegments */
2759                                 size,                   /* maxsegsize */
2760                                 0,                      /* flags */
2761                                 &dma->dma_tag);
2762         if (error) {
2763                 device_printf(adapter->dev,
2764                     "%s: bus_dma_tag_create failed: %d\n",
2765                     __func__, error);
2766                 goto fail_0;
2767         }
2768
2769         error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2770             BUS_DMA_NOWAIT, &dma->dma_map);
2771         if (error) {
2772                 device_printf(adapter->dev,
2773                     "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2774                     __func__, (uintmax_t)size, error);
2775                 goto fail_2;
2776         }
2777
2778         dma->dma_paddr = 0;
2779         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2780             size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2781         if (error || dma->dma_paddr == 0) {
2782                 device_printf(adapter->dev,
2783                     "%s: bus_dmamap_load failed: %d\n",
2784                     __func__, error);
2785                 goto fail_3;
2786         }
2787
2788         return (0);
2789
2790 fail_3:
2791         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2792 fail_2:
2793         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2794         bus_dma_tag_destroy(dma->dma_tag);
2795 fail_0:
2796         dma->dma_map = NULL;
2797         dma->dma_tag = NULL;
2798
2799         return (error);
2800 }
2801
2802 static void
2803 igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2804 {
2805         if (dma->dma_tag == NULL)
2806                 return;
2807         if (dma->dma_map != NULL) {
2808                 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2809                     BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2810                 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2811                 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2812                 dma->dma_map = NULL;
2813         }
2814         bus_dma_tag_destroy(dma->dma_tag);
2815         dma->dma_tag = NULL;
2816 }
2817
2818
2819 /*********************************************************************
2820  *
2821  *  Allocate memory for the transmit and receive rings, and then
2822  *  the descriptors associated with each, called only once at attach.
2823  *
2824  **********************************************************************/
2825 static int
2826 igb_allocate_queues(struct adapter *adapter)
2827 {
2828         device_t dev = adapter->dev;
2829         struct igb_queue        *que = NULL;
2830         struct tx_ring          *txr = NULL;
2831         struct rx_ring          *rxr = NULL;
2832         int rsize, tsize, error = E1000_SUCCESS;
2833         int txconf = 0, rxconf = 0;
2834
2835         /* First allocate the top level queue structs */
2836         if (!(adapter->queues =
2837             (struct igb_queue *) kmalloc(sizeof(struct igb_queue) *
2838             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2839                 device_printf(dev, "Unable to allocate queue memory\n");
2840                 error = ENOMEM;
2841                 goto fail;
2842         }
2843
2844         /* Next allocate the TX ring struct memory */
2845         if (!(adapter->tx_rings =
2846             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2847             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2848                 device_printf(dev, "Unable to allocate TX ring memory\n");
2849                 error = ENOMEM;
2850                 goto tx_fail;
2851         }
2852
2853         /* Now allocate the RX */
2854         if (!(adapter->rx_rings =
2855             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2856             adapter->num_queues, M_DEVBUF, M_INTWAIT | M_ZERO))) {
2857                 device_printf(dev, "Unable to allocate RX ring memory\n");
2858                 error = ENOMEM;
2859                 goto rx_fail;
2860         }
2861
2862         tsize = roundup2(adapter->num_tx_desc *
2863             sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2864         /*
2865          * Now set up the TX queues, txconf is needed to handle the
2866          * possibility that things fail midcourse and we need to
2867          * undo memory gracefully
2868          */ 
2869         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2870                 /* Set up some basics */
2871                 txr = &adapter->tx_rings[i];
2872                 txr->adapter = adapter;
2873                 txr->me = i;
2874
2875                 /* Initialize the TX lock */
2876                 ksnprintf(txr->spin_name, sizeof(txr->spin_name), "%s:tx(%d)",
2877                     device_get_nameunit(dev), txr->me);
2878
2879                 IGB_TX_LOCK_INIT(txr);
2880
2881                 if (igb_dma_malloc(adapter, tsize,
2882                         &txr->txdma, BUS_DMA_NOWAIT)) {
2883                         device_printf(dev,
2884                             "Unable to allocate TX Descriptor memory\n");
2885                         error = ENOMEM;
2886                         goto err_tx_desc;
2887                 }
2888                 txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2889                 bzero((void *)txr->tx_base, tsize);
2890
2891                 /* Now allocate transmit buffers for the ring */
2892                 if (igb_allocate_transmit_buffers(txr)) {
2893                         device_printf(dev,
2894                             "Critical Failure setting up transmit buffers\n");
2895                         error = ENOMEM;
2896                         goto err_tx_desc;
2897                 }
2898 #if __FreeBSD_version >= 800000
2899                 /* Allocate a buf ring */
2900                 txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2901                     M_WAITOK, &txr->tx_mtx);
2902 #endif
2903         }
2904
2905         /*
2906          * Next the RX queues...
2907          */ 
2908         rsize = roundup2(adapter->num_rx_desc *
2909             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2910         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2911                 rxr = &adapter->rx_rings[i];
2912                 rxr->adapter = adapter;
2913                 rxr->me = i;
2914
2915                 /* Initialize the RX lock */
2916                 ksnprintf(rxr->spin_name, sizeof(rxr->spin_name), "%s:rx(%d)",
2917                     device_get_nameunit(dev), txr->me);
2918
2919                 IGB_RX_LOCK_INIT(rxr);
2920
2921                 if (igb_dma_malloc(adapter, rsize,
2922                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2923                         device_printf(dev,
2924                             "Unable to allocate RxDescriptor memory\n");
2925                         error = ENOMEM;
2926                         goto err_rx_desc;
2927                 }
2928                 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2929                 bzero((void *)rxr->rx_base, rsize);
2930
2931                 /* Allocate receive buffers for the ring*/
2932                 if (igb_allocate_receive_buffers(rxr)) {
2933                         device_printf(dev,
2934                             "Critical Failure setting up receive buffers\n");
2935                         error = ENOMEM;
2936                         goto err_rx_desc;
2937                 }
2938         }
2939
2940         /*
2941         ** Finally set up the queue holding structs
2942         */
2943         for (int i = 0; i < adapter->num_queues; i++) {
2944                 que = &adapter->queues[i];
2945                 que->adapter = adapter;
2946                 que->txr = &adapter->tx_rings[i];
2947                 que->rxr = &adapter->rx_rings[i];
2948         }
2949
2950         return (0);
2951
2952 err_rx_desc:
2953         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2954                 igb_dma_free(adapter, &rxr->rxdma);
2955 err_tx_desc:
2956         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2957                 igb_dma_free(adapter, &txr->txdma);
2958         kfree(adapter->rx_rings, M_DEVBUF);
2959 rx_fail:
2960 #if __FreeBSD_version >= 800000
2961         buf_ring_free(txr->br, M_DEVBUF);
2962 #endif
2963         kfree(adapter->tx_rings, M_DEVBUF);
2964 tx_fail:
2965         kfree(adapter->queues, M_DEVBUF);
2966 fail:
2967         return (error);
2968 }
2969
2970 /*********************************************************************
2971  *
2972  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2973  *  the information needed to transmit a packet on the wire. This is
2974  *  called only once at attach, setup is done every reset.
2975  *
2976  **********************************************************************/
2977 static int
2978 igb_allocate_transmit_buffers(struct tx_ring *txr)
2979 {
2980         struct adapter *adapter = txr->adapter;
2981         device_t dev = adapter->dev;
2982         struct igb_tx_buffer *txbuf;
2983         int error, i;
2984
2985         /*
2986          * Setup DMA descriptor areas.
2987          */
2988         if ((error = bus_dma_tag_create(NULL,
2989                                1, 0,                    /* alignment, bounds */
2990                                BUS_SPACE_MAXADDR,       /* lowaddr */
2991                                BUS_SPACE_MAXADDR,       /* highaddr */
2992                                NULL, NULL,              /* filter, filterarg */
2993                                IGB_TSO_SIZE,            /* maxsize */
2994                                IGB_MAX_SCATTER,         /* nsegments */
2995                                PAGE_SIZE,               /* maxsegsize */
2996                                0,                       /* flags */
2997                                &txr->txtag))) {
2998                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2999                 goto fail;
3000         }
3001
3002         if (!(txr->tx_buffers =
3003             (struct igb_tx_buffer *) kmalloc(sizeof(struct igb_tx_buffer) *
3004             adapter->num_tx_desc, M_DEVBUF, M_INTWAIT | M_ZERO))) {
3005                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3006                 error = ENOMEM;
3007                 goto fail;
3008         }
3009
3010         /* Create the descriptor buffer dma maps */
3011         txbuf = txr->tx_buffers;
3012         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3013                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3014                 if (error != 0) {
3015                         device_printf(dev, "Unable to create TX DMA map\n");
3016                         goto fail;
3017                 }
3018         }
3019
3020         return 0;
3021 fail:
3022         /* We free all, it handles case where we are in the middle */
3023         igb_free_transmit_structures(adapter);
3024         return (error);
3025 }
3026
3027 /*********************************************************************
3028  *
3029  *  Initialize a transmit ring.
3030  *
3031  **********************************************************************/
3032 static void
3033 igb_setup_transmit_ring(struct tx_ring *txr)
3034 {
3035         struct adapter *adapter = txr->adapter;
3036         struct igb_tx_buffer *txbuf;
3037         int i;
3038
3039         /* Clear the old descriptor contents */
3040         IGB_TX_LOCK(txr);
3041         bzero((void *)txr->tx_base,
3042               (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3043         /* Reset indices */
3044         txr->next_avail_desc = 0;
3045         txr->next_to_clean = 0;
3046
3047         /* Free any existing tx buffers. */
3048         txbuf = txr->tx_buffers;
3049         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3050                 if (txbuf->m_head != NULL) {
3051                         bus_dmamap_sync(txr->txtag, txbuf->map,
3052                             BUS_DMASYNC_POSTWRITE);
3053                         bus_dmamap_unload(txr->txtag, txbuf->map);
3054                         m_freem(txbuf->m_head);
3055                         txbuf->m_head = NULL;
3056                 }
3057                 /* clear the watch index */
3058                 txbuf->next_eop = -1;
3059         }
3060
3061         /* Set number of descriptors available */
3062         txr->tx_avail = adapter->num_tx_desc;
3063
3064         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3065             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3066         IGB_TX_UNLOCK(txr);
3067 }
3068
3069 /*********************************************************************
3070  *
3071  *  Initialize all transmit rings.
3072  *
3073  **********************************************************************/
3074 static void
3075 igb_setup_transmit_structures(struct adapter *adapter)
3076 {
3077         struct tx_ring *txr = adapter->tx_rings;
3078
3079         for (int i = 0; i < adapter->num_queues; i++, txr++)
3080                 igb_setup_transmit_ring(txr);
3081
3082         return;
3083 }
3084
3085 /*********************************************************************
3086  *
3087  *  Enable transmit unit.
3088  *
3089  **********************************************************************/
3090 static void
3091 igb_initialize_transmit_units(struct adapter *adapter)
3092 {
3093         struct tx_ring  *txr = adapter->tx_rings;
3094         struct e1000_hw *hw = &adapter->hw;
3095         u32             tctl, txdctl;
3096
3097         INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3098
3099         /* Setup the Tx Descriptor Rings */
3100         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3101                 u64 bus_addr = txr->txdma.dma_paddr;
3102
3103                 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3104                     adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3105                 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3106                     (uint32_t)(bus_addr >> 32));
3107                 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3108                     (uint32_t)bus_addr);
3109
3110                 /* Setup the HW Tx Head and Tail descriptor pointers */
3111                 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3112                 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3113
3114                 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3115                     E1000_READ_REG(hw, E1000_TDBAL(i)),
3116                     E1000_READ_REG(hw, E1000_TDLEN(i)));
3117
3118                 txr->watchdog_check = FALSE;
3119
3120                 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
3121                 txdctl |= IGB_TX_PTHRESH;
3122                 txdctl |= IGB_TX_HTHRESH << 8;
3123                 txdctl |= IGB_TX_WTHRESH << 16;
3124                 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3125                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3126         }
3127
3128         /* Program the Transmit Control Register */
3129         tctl = E1000_READ_REG(hw, E1000_TCTL);
3130         tctl &= ~E1000_TCTL_CT;
3131         tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3132                    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3133
3134         e1000_config_collision_dist(hw);
3135
3136         /* This write will effectively turn on the transmit unit. */
3137         E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3138 }
3139
3140 /*********************************************************************
3141  *
3142  *  Free all transmit rings.
3143  *
3144  **********************************************************************/
3145 static void
3146 igb_free_transmit_structures(struct adapter *adapter)
3147 {
3148         struct tx_ring *txr = adapter->tx_rings;
3149
3150         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3151                 IGB_TX_LOCK(txr);
3152                 igb_free_transmit_buffers(txr);
3153                 igb_dma_free(adapter, &txr->txdma);
3154                 IGB_TX_UNLOCK(txr);
3155                 IGB_TX_LOCK_DESTROY(txr);
3156         }
3157         kfree(adapter->tx_rings, M_DEVBUF);
3158 }
3159
3160 /*********************************************************************
3161  *
3162  *  Free transmit ring related data structures.
3163  *
3164  **********************************************************************/
3165 static void
3166 igb_free_transmit_buffers(struct tx_ring *txr)
3167 {
3168         struct adapter *adapter = txr->adapter;
3169         struct igb_tx_buffer *tx_buffer;
3170         int             i;
3171
3172         INIT_DEBUGOUT("free_transmit_ring: begin");
3173
3174         if (txr->tx_buffers == NULL)
3175                 return;
3176
3177         tx_buffer = txr->tx_buffers;
3178         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3179                 if (tx_buffer->m_head != NULL) {
3180                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3181                             BUS_DMASYNC_POSTWRITE);
3182                         bus_dmamap_unload(txr->txtag,
3183                             tx_buffer->map);
3184                         m_freem(tx_buffer->m_head);
3185                         tx_buffer->m_head = NULL;
3186                         if (tx_buffer->map != NULL) {
3187                                 bus_dmamap_destroy(txr->txtag,
3188                                     tx_buffer->map);
3189                                 tx_buffer->map = NULL;
3190                         }
3191                 } else if (tx_buffer->map != NULL) {
3192                         bus_dmamap_unload(txr->txtag,
3193                             tx_buffer->map);
3194                         bus_dmamap_destroy(txr->txtag,
3195                             tx_buffer->map);
3196                         tx_buffer->map = NULL;
3197                 }
3198         }
3199 #if __FreeBSD_version >= 800000
3200         if (txr->br != NULL)
3201                 buf_ring_free(txr->br, M_DEVBUF);
3202 #endif
3203         if (txr->tx_buffers != NULL) {
3204                 kfree(txr->tx_buffers, M_DEVBUF);
3205                 txr->tx_buffers = NULL;
3206         }
3207         if (txr->txtag != NULL) {
3208                 bus_dma_tag_destroy(txr->txtag);
3209                 txr->txtag = NULL;
3210         }
3211         return;
3212 }
3213
3214 /**********************************************************************
3215  *
3216  *  Setup work for hardware segmentation offload (TSO)
3217  *
3218  **********************************************************************/
3219 #ifdef NET_TSO 
3220 static boolean_t
3221 igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3222 {
3223         struct adapter *adapter = txr->adapter;
3224         struct e1000_adv_tx_context_desc *TXD;
3225         struct igb_tx_buffer        *tx_buffer;
3226         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3227         u32 mss_l4len_idx = 0;
3228         u16 vtag = 0;
3229         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3230         struct ether_vlan_header *eh;
3231         struct ip *ip;
3232         struct tcphdr *th;
3233
3234
3235         /*
3236          * Determine where frame payload starts.
3237          * Jump over vlan headers if already present
3238          */
3239         eh = mtod(mp, struct ether_vlan_header *);
3240         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3241                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3242         else
3243                 ehdrlen = ETHER_HDR_LEN;
3244
3245         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3246         if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3247                 return FALSE;
3248
3249         /* Only supports IPV4 for now */
3250         ctxd = txr->next_avail_desc;
3251         tx_buffer = &txr->tx_buffers[ctxd];
3252         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3253
3254         ip = (struct ip *)(mp->m_data + ehdrlen);
3255         if (ip->ip_p != IPPROTO_TCP)
3256                 return FALSE;   /* 0 */
3257         ip->ip_sum = 0;
3258         ip_hlen = ip->ip_hl << 2;
3259         th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3260         th->th_sum = in_pseudo(ip->ip_src.s_addr,
3261             ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3262         tcp_hlen = th->th_off << 2;
3263         /*
3264          * Calculate header length, this is used
3265          * in the transmit desc in igb_xmit
3266          */
3267         *hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3268
3269         /* VLAN MACLEN IPLEN */
3270         if (mp->m_flags & M_VLANTAG) {
3271                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3272                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3273         }
3274
3275         vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3276         vlan_macip_lens |= ip_hlen;
3277         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3278
3279         /* ADV DTYPE TUCMD */
3280         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3281         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3282         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3283         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3284
3285         /* MSS L4LEN IDX */
3286         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3287         mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3288         /* 82575 needs the queue index added */
3289         if (adapter->hw.mac.type == e1000_82575)
3290                 mss_l4len_idx |= txr->me << 4;
3291         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3292
3293         TXD->seqnum_seed = htole32(0);
3294         tx_buffer->m_head = NULL;
3295         tx_buffer->next_eop = -1;
3296
3297         if (++ctxd == adapter->num_tx_desc)
3298                 ctxd = 0;
3299
3300         txr->tx_avail--;
3301         txr->next_avail_desc = ctxd;
3302         return TRUE;
3303 }
3304 #endif
3305
3306 /*********************************************************************
3307  *
3308  *  Context Descriptor setup for VLAN or CSUM
3309  *
3310  **********************************************************************/
3311
3312 static bool
3313 igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3314 {
3315         struct adapter *adapter = txr->adapter;
3316         struct e1000_adv_tx_context_desc *TXD;
3317         struct igb_tx_buffer        *tx_buffer;
3318         u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3319         struct ether_vlan_header *eh;
3320         struct ip *ip = NULL;
3321         struct ip6_hdr *ip6;
3322         int  ehdrlen, ctxd, ip_hlen = 0;
3323         u16     etype, vtag = 0;
3324         u8      ipproto = 0;
3325         bool    offload = TRUE;
3326
3327         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3328                 offload = FALSE;
3329
3330         vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3331         ctxd = txr->next_avail_desc;
3332         tx_buffer = &txr->tx_buffers[ctxd];
3333         TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3334
3335         /*
3336         ** In advanced descriptors the vlan tag must 
3337         ** be placed into the context descriptor, thus
3338         ** we need to be here just for that setup.
3339         */
3340         if (mp->m_flags & M_VLANTAG) {
3341                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3342                 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3343         } else if (offload == FALSE)
3344                 return FALSE;
3345
3346         /*
3347          * Determine where frame payload starts.
3348          * Jump over vlan headers if already present,
3349          * helpful for QinQ too.
3350          */
3351         eh = mtod(mp, struct ether_vlan_header *);
3352         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3353                 etype = ntohs(eh->evl_proto);
3354                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3355         } else {
3356                 etype = ntohs(eh->evl_encap_proto);
3357                 ehdrlen = ETHER_HDR_LEN;
3358         }
3359
3360         /* Set the ether header length */
3361         vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3362
3363         switch (etype) {
3364                 case ETHERTYPE_IP:
3365                         ip = (struct ip *)(mp->m_data + ehdrlen);
3366                         ip_hlen = ip->ip_hl << 2;
3367                         if (mp->m_len < ehdrlen + ip_hlen) {
3368                                 offload = FALSE;
3369                                 break;
3370                         }
3371                         ipproto = ip->ip_p;
3372                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3373                         break;
3374                 case ETHERTYPE_IPV6:
3375                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3376                         ip_hlen = sizeof(struct ip6_hdr);
3377                         if (mp->m_len < ehdrlen + ip_hlen)
3378                                 return (FALSE);
3379                         ipproto = ip6->ip6_nxt;
3380                         type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3381                         break;
3382                 default:
3383                         offload = FALSE;
3384                         break;
3385         }
3386
3387         vlan_macip_lens |= ip_hlen;
3388         type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3389
3390         switch (ipproto) {
3391                 case IPPROTO_TCP:
3392                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3393                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3394                         break;
3395                 case IPPROTO_UDP:
3396                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3397                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3398                         break;
3399 #if __FreeBSD_version >= 800000
3400                 case IPPROTO_SCTP:
3401                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3402                                 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3403                         break;
3404 #endif
3405                 default:
3406                         offload = FALSE;
3407                         break;
3408         }
3409
3410         /* 82575 needs the queue index added */
3411         if (adapter->hw.mac.type == e1000_82575)
3412                 mss_l4len_idx = txr->me << 4;
3413
3414         /* Now copy bits into descriptor */
3415         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3416         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3417         TXD->seqnum_seed = htole32(0);
3418         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3419
3420         tx_buffer->m_head = NULL;
3421         tx_buffer->next_eop = -1;
3422
3423         /* We've consumed the first desc, adjust counters */
3424         if (++ctxd == adapter->num_tx_desc)
3425                 ctxd = 0;
3426         txr->next_avail_desc = ctxd;
3427         --txr->tx_avail;
3428
3429         return (offload);
3430 }
3431
3432
3433 /**********************************************************************
3434  *
3435  *  Examine each tx_buffer in the used queue. If the hardware is done
3436  *  processing the packet then free associated resources. The
3437  *  tx_buffer is put back on the free queue.
3438  *
3439  *  TRUE return means there's work in the ring to clean, FALSE its empty.
3440  **********************************************************************/
3441 static bool
3442 igb_txeof(struct tx_ring *txr)
3443 {
3444         struct adapter  *adapter = txr->adapter;
3445         int first, last, done;
3446         struct igb_tx_buffer *tx_buffer;
3447         struct e1000_tx_desc   *tx_desc, *eop_desc;
3448         struct ifnet   *ifp = adapter->ifp;
3449
3450         IGB_TX_LOCK_ASSERT(txr);
3451
3452         if (txr->tx_avail == adapter->num_tx_desc)
3453                 return FALSE;
3454
3455         first = txr->next_to_clean;
3456         tx_desc = &txr->tx_base[first];
3457         tx_buffer = &txr->tx_buffers[first];
3458         last = tx_buffer->next_eop;
3459         eop_desc = &txr->tx_base[last];
3460
3461         /*
3462          * What this does is get the index of the
3463          * first descriptor AFTER the EOP of the 
3464          * first packet, that way we can do the
3465          * simple comparison on the inner while loop.
3466          */
3467         if (++last == adapter->num_tx_desc)
3468                 last = 0;
3469         done = last;
3470
3471         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3472             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3473
3474         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3475                 /* We clean the range of the packet */
3476                 while (first != done) {
3477                         tx_desc->upper.data = 0;
3478                         tx_desc->lower.data = 0;
3479                         tx_desc->buffer_addr = 0;
3480                         ++txr->tx_avail;
3481
3482                         if (tx_buffer->m_head) {
3483                                 txr->bytes +=
3484                                     tx_buffer->m_head->m_pkthdr.len;
3485                                 bus_dmamap_sync(txr->txtag,
3486                                     tx_buffer->map,
3487                                     BUS_DMASYNC_POSTWRITE);
3488                                 bus_dmamap_unload(txr->txtag,
3489                                     tx_buffer->map);
3490
3491                                 m_freem(tx_buffer->m_head);
3492                                 tx_buffer->m_head = NULL;
3493                         }
3494                         tx_buffer->next_eop = -1;
3495                         txr->watchdog_time = ticks;
3496
3497                         if (++first == adapter->num_tx_desc)
3498                                 first = 0;
3499
3500                         tx_buffer = &txr->tx_buffers[first];
3501                         tx_desc = &txr->tx_base[first];
3502                 }
3503                 ++txr->packets;
3504                 ++ifp->if_opackets;
3505                 /* See if we can continue to the next packet */
3506                 last = tx_buffer->next_eop;
3507                 if (last != -1) {
3508                         eop_desc = &txr->tx_base[last];
3509                         /* Get new done point */
3510                         if (++last == adapter->num_tx_desc) last = 0;
3511                         done = last;
3512                 } else
3513                         break;
3514         }
3515         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3516             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3517
3518         txr->next_to_clean = first;
3519
3520         /*
3521          * If we have enough room, clear IFF_DRV_OACTIVE
3522          * to tell the stack that it is OK to send packets.
3523          */
3524         if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {                
3525                 ifp->if_flags &= ~IFF_OACTIVE;
3526                 /* All clean, turn off the watchdog */
3527                 if (txr->tx_avail == adapter->num_tx_desc) {
3528                         txr->watchdog_check = FALSE;
3529                         return FALSE;
3530                 }
3531         }
3532
3533         return (TRUE);
3534 }
3535
3536
3537 /*********************************************************************
3538  *
3539  *  Setup descriptor buffer(s) from system mbuf buffer pools.
3540  *              i - designates the ring index
3541  *              clean - tells the function whether to update
3542  *                      the header, the packet buffer, or both.
3543  *
3544  **********************************************************************/
3545 static int
3546 igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3547 {
3548         struct adapter          *adapter = rxr->adapter;
3549         struct igb_rx_buf       *rxbuf;
3550         struct mbuf             *mh, *mp;
3551         bus_dma_segment_t       hseg[1];
3552         bus_dma_segment_t       pseg[1];
3553         bus_dmamap_t            map;
3554         int                     nsegs, error;
3555         int                     mbflags;
3556
3557         /*
3558          * Init-time loads are allowed to use a blocking mbuf allocation,
3559          * otherwise the sheer number of mbufs allocated can lead to
3560          * failures.
3561          */
3562         mbflags = (clean & IGB_CLEAN_INITIAL) ? MB_WAIT : MB_DONTWAIT;
3563
3564         rxbuf = &rxr->rx_buffers[i];
3565         mh = mp = NULL;
3566         if ((clean & IGB_CLEAN_HEADER) != 0) {
3567                 mh = m_gethdr(mbflags, MT_DATA);
3568                 if (mh == NULL) {
3569                         adapter->mbuf_header_failed++;          
3570                         return (ENOBUFS);
3571                 }
3572                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3573                 /*
3574                  * Because IGB_HDR_BUF size is less than MHLEN
3575                  * and we configure controller to split headers
3576                  * we can align mbuf on ETHER_ALIGN boundary.
3577                  */
3578                 m_adj(mh, ETHER_ALIGN);
3579                 error = bus_dmamap_load_mbuf_segment(rxr->rx_htag,
3580                     rxr->rx_hspare_map, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3581                 if (error != 0) {
3582                         m_freem(mh);
3583                         return (error);
3584                 }
3585                 mh->m_flags &= ~M_PKTHDR;
3586         }
3587         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3588                 mp = m_getl(adapter->rx_mbuf_sz, mbflags, MT_DATA,
3589                             M_PKTHDR, NULL);
3590 #if 0
3591                 mp = m_getjcl(MB_DONTWAIT, MT_DATA, M_PKTHDR,
3592                     adapter->rx_mbuf_sz);
3593 #endif
3594                 if (mp == NULL) {
3595                         if (mh != NULL) {
3596                                 adapter->mbuf_packet_failed++;          
3597                                 bus_dmamap_unload(rxr->rx_htag,
3598                                     rxbuf->head_map);
3599                                 mh->m_flags |= M_PKTHDR;
3600                                 m_freem(mh);
3601                         }
3602                         return (ENOBUFS);
3603                 }
3604                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3605                 error = bus_dmamap_load_mbuf_segment(rxr->rx_ptag,
3606                     rxr->rx_pspare_map, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3607                 if (error != 0) {
3608                         if (mh != NULL) {
3609                                 bus_dmamap_unload(rxr->rx_htag,
3610                                     rxbuf->head_map);
3611                                 mh->m_flags |= M_PKTHDR;
3612                                 m_freem(mh);
3613                         }
3614                         m_freem(mp);
3615                         return (error);
3616                 }
3617                 mp->m_flags &= ~M_PKTHDR;
3618         }
3619
3620         /* Loading new DMA maps complete, unload maps for received buffers. */
3621         if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3622                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3623                     BUS_DMASYNC_POSTREAD);
3624                 bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3625         }
3626         if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3627                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3628                     BUS_DMASYNC_POSTREAD);
3629                 bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3630         }
3631
3632         /* Reflect loaded dmamaps. */
3633         if ((clean & IGB_CLEAN_HEADER) != 0) {
3634                 map = rxbuf->head_map;
3635                 rxbuf->head_map = rxr->rx_hspare_map;
3636                 rxr->rx_hspare_map = map;
3637                 rxbuf->m_head = mh;
3638                 bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3639                     BUS_DMASYNC_PREREAD);
3640                 rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3641         }
3642         if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3643                 map = rxbuf->pack_map;
3644                 rxbuf->pack_map = rxr->rx_pspare_map;
3645                 rxr->rx_pspare_map = map;
3646                 rxbuf->m_pack = mp;
3647                 bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3648                     BUS_DMASYNC_PREREAD);
3649                 rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3650         }
3651
3652         return (0);
3653 }
3654
3655 /*********************************************************************
3656  *
3657  *  Allocate memory for rx_buffer structures. Since we use one
3658  *  rx_buffer per received packet, the maximum number of rx_buffer's
3659  *  that we'll need is equal to the number of receive descriptors
3660  *  that we've allocated.
3661  *
3662  **********************************************************************/
3663 static int
3664 igb_allocate_receive_buffers(struct rx_ring *rxr)
3665 {
3666         struct  adapter         *adapter = rxr->adapter;
3667         device_t                dev = adapter->dev;
3668         struct igb_rx_buf       *rxbuf;
3669         int                     i, bsize, error;
3670
3671         bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3672         if (!(rxr->rx_buffers =
3673             (struct igb_rx_buf *) kmalloc(bsize,
3674             M_DEVBUF, M_INTWAIT | M_ZERO))) {
3675                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3676                 error = ENOMEM;
3677                 goto fail;
3678         }
3679
3680         if ((error = bus_dma_tag_create(NULL,
3681                                    1, 0,                /* alignment, bounds */
3682                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3683                                    BUS_SPACE_MAXADDR,   /* highaddr */
3684                                    NULL, NULL,          /* filter, filterarg */
3685                                    MSIZE,               /* maxsize */
3686                                    1,                   /* nsegments */
3687                                    MSIZE,               /* maxsegsize */
3688                                    0,                   /* flags */
3689                                    &rxr->rx_htag))) {
3690                 device_printf(dev, "Unable to create RX DMA tag\n");
3691                 goto fail;
3692         }
3693
3694         if ((error = bus_dma_tag_create(NULL,
3695                                    1, 0,                /* alignment, bounds */
3696                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3697                                    BUS_SPACE_MAXADDR,   /* highaddr */
3698                                    NULL, NULL,          /* filter, filterarg */
3699                                    MJUMPAGESIZE,        /* maxsize */
3700                                    1,                   /* nsegments */
3701                                    MJUMPAGESIZE,        /* maxsegsize */
3702                                    0,                   /* flags */
3703                                    &rxr->rx_ptag))) {
3704                 device_printf(dev, "Unable to create RX payload DMA tag\n");
3705                 goto fail;
3706         }
3707
3708         /* Create the spare maps (used by getbuf) */
3709         error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3710              &rxr->rx_hspare_map);
3711         if (error) {
3712                 device_printf(dev,
3713                     "%s: bus_dmamap_create header spare failed: %d\n",
3714                     __func__, error);
3715                 goto fail;
3716         }
3717         error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3718              &rxr->rx_pspare_map);
3719         if (error) {
3720                 device_printf(dev,
3721                     "%s: bus_dmamap_create packet spare failed: %d\n",
3722                     __func__, error);
3723                 goto fail;
3724         }
3725
3726         for (i = 0; i < adapter->num_rx_desc; i++) {
3727                 rxbuf = &rxr->rx_buffers[i];
3728                 error = bus_dmamap_create(rxr->rx_htag,
3729                     BUS_DMA_NOWAIT, &rxbuf->head_map);
3730                 if (error) {
3731                         device_printf(dev,
3732                             "Unable to create RX head DMA maps\n");
3733                         goto fail;
3734                 }
3735                 error = bus_dmamap_create(rxr->rx_ptag,
3736                     BUS_DMA_NOWAIT, &rxbuf->pack_map);
3737                 if (error) {
3738                         device_printf(dev,
3739                             "Unable to create RX packet DMA maps\n");
3740                         goto fail;
3741                 }
3742         }
3743
3744         return (0);
3745
3746 fail:
3747         /* Frees all, but can handle partial completion */
3748         igb_free_receive_structures(adapter);
3749         return (error);
3750 }
3751
3752
3753 static void
3754 igb_free_receive_ring(struct rx_ring *rxr)
3755 {
3756         struct  adapter         *adapter;
3757         struct igb_rx_buf       *rxbuf;
3758         int i;
3759
3760         adapter = rxr->adapter;
3761         for (i = 0; i < adapter->num_rx_desc; i++) {
3762                 rxbuf = &rxr->rx_buffers[i];
3763                 if (rxbuf->m_head != NULL) {
3764                         bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3765                             BUS_DMASYNC_POSTREAD);
3766                         bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3767                         rxbuf->m_head->m_flags |= M_PKTHDR;
3768                         m_freem(rxbuf->m_head);
3769                 }
3770                 if (rxbuf->m_pack != NULL) {
3771                         bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3772                             BUS_DMASYNC_POSTREAD);
3773                         bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3774                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3775                         m_freem(rxbuf->m_pack);
3776                 }
3777                 rxbuf->m_head = NULL;
3778                 rxbuf->m_pack = NULL;
3779         }
3780 }
3781
3782
3783 /*********************************************************************
3784  *
3785  *  Initialize a receive ring and its buffers.
3786  *
3787  **********************************************************************/
3788 static int
3789 igb_setup_receive_ring(struct rx_ring *rxr)
3790 {
3791         struct  adapter         *adapter;
3792         struct  ifnet           *ifp;
3793         device_t                dev;
3794 #ifdef NET_LRO 
3795         struct lro_ctrl         *lro = &rxr->lro;
3796 #endif
3797         int                     j, rsize, error = 0;
3798
3799         adapter = rxr->adapter;
3800         dev = adapter->dev;
3801         ifp = adapter->ifp;
3802
3803         /* Clear the ring contents */
3804         IGB_RX_LOCK(rxr);
3805         rsize = roundup2(adapter->num_rx_desc *
3806             sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3807         bzero((void *)rxr->rx_base, rsize);
3808
3809         /*
3810         ** Free current RX buffer structures and their mbufs
3811         */
3812         igb_free_receive_ring(rxr);
3813
3814         /* Now replenish the ring mbufs */
3815         for (j = 0; j < adapter->num_rx_desc; j++) {
3816                 error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH | IGB_CLEAN_INITIAL);
3817                 if (error)
3818                         goto fail;
3819         }
3820
3821         /* Setup our descriptor indices */
3822         rxr->next_to_check = 0;
3823         rxr->last_cleaned = 0;
3824         rxr->lro_enabled = FALSE;
3825
3826         if (igb_header_split)
3827                 rxr->hdr_split = TRUE;
3828 #if NET_LRO 
3829         else
3830                 ifp->if_capabilities &= ~IFCAP_LRO;
3831 #endif
3832
3833         rxr->fmp = NULL;
3834         rxr->lmp = NULL;
3835         rxr->discard = FALSE;
3836
3837         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3838             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3839
3840         /*
3841         ** Now set up the LRO interface, we
3842         ** also only do head split when LRO
3843         ** is enabled, since so often they
3844         ** are undesireable in similar setups.
3845         */
3846 #if NET_LRO 
3847         if (ifp->if_capenable & IFCAP_LRO) {
3848                 int err = tcp_lro_init(lro);
3849                 if (err) {
3850                         device_printf(dev, "LRO Initialization failed!\n");
3851                         goto fail;
3852                 }
3853                 INIT_DEBUGOUT("RX LRO Initialized\n");
3854                 rxr->lro_enabled = TRUE;
3855                 lro->ifp = adapter->ifp;
3856         }
3857 #endif
3858
3859         IGB_RX_UNLOCK(rxr);
3860         return (0);
3861
3862 fail:
3863         igb_free_receive_ring(rxr);
3864         IGB_RX_UNLOCK(rxr);
3865         return (error);
3866 }
3867
3868 /*********************************************************************
3869  *
3870  *  Initialize all receive rings.
3871  *
3872  **********************************************************************/
3873 static int
3874 igb_setup_receive_structures(struct adapter *adapter)
3875 {
3876         struct rx_ring *rxr = adapter->rx_rings;
3877         int i, j;
3878
3879         for (i = 0; i < adapter->num_queues; i++, rxr++)
3880                 if (igb_setup_receive_ring(rxr))
3881                         goto fail;
3882
3883         return (0);
3884 fail:
3885         /*
3886          * Free RX buffers allocated so far, we will only handle
3887          * the rings that completed, the failing case will have
3888          * cleaned up for itself. The value of 'i' will be the
3889          * failed ring so we must pre-decrement it.
3890          */
3891         rxr = adapter->rx_rings;
3892         for (--i; i > 0; i--, rxr++) {
3893                 for (j = 0; j < adapter->num_rx_desc; j++)
3894                         igb_free_receive_ring(rxr);
3895         }
3896
3897         return (ENOBUFS);
3898 }
3899
3900 /*********************************************************************
3901  *
3902  *  Enable receive unit.
3903  *
3904  **********************************************************************/
3905 static void
3906 igb_initialize_receive_units(struct adapter *adapter)
3907 {
3908         struct rx_ring  *rxr = adapter->rx_rings;
3909         struct ifnet    *ifp = adapter->ifp;
3910         struct e1000_hw *hw = &adapter->hw;
3911         u32             rctl, rxcsum, psize, srrctl = 0;
3912
3913         INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3914
3915         /*
3916          * Make sure receives are disabled while setting
3917          * up the descriptor ring
3918          */
3919         rctl = E1000_READ_REG(hw, E1000_RCTL);
3920         E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3921
3922         /*
3923         ** Set up for header split
3924         */
3925         if (rxr->hdr_split) {
3926                 /* Use a standard mbuf for the header */
3927                 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3928                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3929         } else