c07165e5e36c9546dc07edc3846ba99b574615cb
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 #if 0   /* HAVE_MSIX */
125 static int      ixgbe_setup_msix(struct adapter *);
126 #endif
127 static void     ixgbe_free_pci_resources(struct adapter *);
128 static void     ixgbe_local_timer(void *);
129 static int      ixgbe_setup_interface(device_t, struct adapter *);
130 static void     ixgbe_config_link(struct adapter *);
131
132 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133 static int      ixgbe_setup_transmit_structures(struct adapter *);
134 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
135 static void     ixgbe_initialize_transmit_units(struct adapter *);
136 static void     ixgbe_free_transmit_structures(struct adapter *);
137 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140 static int      ixgbe_setup_receive_structures(struct adapter *);
141 static int      ixgbe_setup_receive_ring(struct rx_ring *);
142 static void     ixgbe_initialize_receive_units(struct adapter *);
143 static void     ixgbe_free_receive_structures(struct adapter *);
144 static void     ixgbe_free_receive_buffers(struct rx_ring *);
145 #if 0   /* NET_LRO */
146 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
147 #endif
148
149 static void     ixgbe_enable_intr(struct adapter *);
150 static void     ixgbe_disable_intr(struct adapter *);
151 static void     ixgbe_update_stats_counters(struct adapter *);
152 static bool     ixgbe_txeof(struct tx_ring *);
153 static bool     ixgbe_rxeof(struct ix_queue *, int);
154 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
155 static void     ixgbe_set_promisc(struct adapter *);
156 static void     ixgbe_set_multi(struct adapter *);
157 static void     ixgbe_update_link_status(struct adapter *);
158 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
159 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
160 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
162 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
163 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
164                     struct ixgbe_dma_alloc *, int);
165 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
166 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
167                     const char *, int *, int);
168 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
169 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
170 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
171 static void     ixgbe_configure_ivars(struct adapter *);
172 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
173
174 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
175 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
176 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
177
178 static void     ixgbe_add_hw_stats(struct adapter *adapter);
179
180 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
181 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
182                     struct mbuf *, u32);
183
184 /* Support for pluggable optic modules */
185 static bool     ixgbe_sfp_probe(struct adapter *);
186 static void     ixgbe_setup_optics(struct adapter *);
187
188 /* Legacy (single vector interrupt handler */
189 static void     ixgbe_legacy_irq(void *);
190
191 /* The MSI/X Interrupt handlers */
192 static void     ixgbe_msix_que(void *);
193 static void     ixgbe_msix_link(void *);
194
195 /* Deferred interrupt tasklets */
196 static void     ixgbe_handle_que(void *, int);
197 static void     ixgbe_handle_link(void *, int);
198 static void     ixgbe_handle_msf(void *, int);
199 static void     ixgbe_handle_mod(void *, int);
200
201 #ifdef IXGBE_FDIR
202 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
203 static void     ixgbe_reinit_fdir(void *, int);
204 #endif
205
206 /*********************************************************************
207  *  FreeBSD Device Interface Entry Points
208  *********************************************************************/
209
210 static device_method_t ixgbe_methods[] = {
211         /* Device interface */
212         DEVMETHOD(device_probe, ixgbe_probe),
213         DEVMETHOD(device_attach, ixgbe_attach),
214         DEVMETHOD(device_detach, ixgbe_detach),
215         DEVMETHOD(device_shutdown, ixgbe_shutdown),
216         {0, 0}
217 };
218
219 static driver_t ixgbe_driver = {
220         "ix", ixgbe_methods, sizeof(struct adapter),
221 };
222
223 devclass_t ixgbe_devclass;
224 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
225
226 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
227 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
228
229 /*
230 ** TUNEABLE PARAMETERS:
231 */
232
233 /*
234 ** AIM: Adaptive Interrupt Moderation
235 ** which means that the interrupt rate
236 ** is varied over time based on the
237 ** traffic for that interrupt vector
238 */
239 static int ixgbe_enable_aim = TRUE;
240 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
241
242 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
243 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
244
245 /* How many packets rxeof tries to clean at a time */
246 static int ixgbe_rx_process_limit = 128;
247 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
248
249 /*
250 ** Smart speed setting, default to on
251 ** this only works as a compile option
252 ** right now as its during attach, set
253 ** this to 'ixgbe_smart_speed_off' to
254 ** disable.
255 */
256 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
257
258 static int ixgbe_msi_enable = 1;
259 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
260
261 /*
262  * MSIX should be the default for best performance,
263  * but this allows it to be forced off for testing.
264  */
265 static int ixgbe_enable_msix = 1;
266 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
267
268 /*
269  * Header split: this causes the hardware to DMA
270  * the header into a separate mbuf from the payload,
271  * it can be a performance win in some workloads, but
272  * in others it actually hurts, its off by default. 
273  */
274 static int ixgbe_header_split = FALSE;
275 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
276
277 /*
278  * Number of Queues, can be set to 0,
279  * it then autoconfigures based on the
280  * number of cpus with a max of 8. This
281  * can be overriden manually here.
282  */
283 static int ixgbe_num_queues = 0;
284 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
285
286 /*
287 ** Number of TX descriptors per ring,
288 ** setting higher than RX as this seems
289 ** the better performing choice.
290 */
291 static int ixgbe_txd = PERFORM_TXD;
292 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
293
294 /* Number of RX descriptors per ring */
295 static int ixgbe_rxd = PERFORM_RXD;
296 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
297
298 /* Keep running tab on them for sanity check */
299 static int ixgbe_total_ports;
300
301 #ifdef IXGBE_FDIR
302 /*
303 ** For Flow Director: this is the
304 ** number of TX packets we sample
305 ** for the filter pool, this means
306 ** every 20th packet will be probed.
307 **
308 ** This feature can be disabled by 
309 ** setting this to 0.
310 */
311 static int atr_sample_rate = 20;
312 /* 
313 ** Flow Director actually 'steals'
314 ** part of the packet buffer as its
315 ** filter pool, this variable controls
316 ** how much it uses:
317 **  0 = 64K, 1 = 128K, 2 = 256K
318 */
319 static int fdir_pballoc = 1;
320 #endif
321
322 #ifdef DEV_NETMAP
323 /*
324  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
325  * be a reference on how to implement netmap support in a driver.
326  * Additional comments are in ixgbe_netmap.h .
327  *
328  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
329  * that extend the standard driver.
330  */
331 #include <dev/netmap/ixgbe_netmap.h>
332 #endif /* DEV_NETMAP */
333
334 /*********************************************************************
335  *  Device identification routine
336  *
337  *  ixgbe_probe determines if the driver should be loaded on
338  *  adapter based on PCI vendor/device id of the adapter.
339  *
340  *  return BUS_PROBE_DEFAULT on success, positive on failure
341  *********************************************************************/
342
343 static int
344 ixgbe_probe(device_t dev)
345 {
346         ixgbe_vendor_info_t *ent;
347
348         u16     pci_vendor_id = 0;
349         u16     pci_device_id = 0;
350         u16     pci_subvendor_id = 0;
351         u16     pci_subdevice_id = 0;
352         char    adapter_name[256];
353
354         INIT_DEBUGOUT("ixgbe_probe: begin");
355
356         pci_vendor_id = pci_get_vendor(dev);
357         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
358                 return (ENXIO);
359
360         pci_device_id = pci_get_device(dev);
361         pci_subvendor_id = pci_get_subvendor(dev);
362         pci_subdevice_id = pci_get_subdevice(dev);
363
364         ent = ixgbe_vendor_info_array;
365         while (ent->vendor_id != 0) {
366                 if ((pci_vendor_id == ent->vendor_id) &&
367                     (pci_device_id == ent->device_id) &&
368
369                     ((pci_subvendor_id == ent->subvendor_id) ||
370                      (ent->subvendor_id == 0)) &&
371
372                     ((pci_subdevice_id == ent->subdevice_id) ||
373                      (ent->subdevice_id == 0))) {
374                         ksprintf(adapter_name, "%s, Version - %s",
375                                 ixgbe_strings[ent->index],
376                                 ixgbe_driver_version);
377                         device_set_desc_copy(dev, adapter_name);
378                         ++ixgbe_total_ports;
379                         return (BUS_PROBE_DEFAULT);
380                 }
381                 ent++;
382         }
383         return (ENXIO);
384 }
385
386 /*********************************************************************
387  *  Device initialization routine
388  *
389  *  The attach entry point is called when the driver is being loaded.
390  *  This routine identifies the type of hardware, allocates all resources
391  *  and initializes the hardware.
392  *
393  *  return 0 on success, positive on failure
394  *********************************************************************/
395
396 static int
397 ixgbe_attach(device_t dev)
398 {
399         struct adapter *adapter;
400         struct ixgbe_hw *hw;
401         int             error = 0;
402         u16             csum;
403         u32             ctrl_ext;
404
405         INIT_DEBUGOUT("ixgbe_attach: begin");
406
407         if (resource_disabled("ixgbe", device_get_unit(dev))) {
408                 device_printf(dev, "Disabled by device hint\n");
409                 return (ENXIO);
410         }
411
412         /* Allocate, clear, and link in our adapter structure */
413         adapter = device_get_softc(dev);
414         adapter->dev = adapter->osdep.dev = dev;
415         hw = &adapter->hw;
416
417         /* Core Lock Init*/
418         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
419
420         /* SYSCTL APIs */
421
422         sysctl_ctx_init(&adapter->sysctl_ctx);
423         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
424             SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
425             device_get_nameunit(adapter->dev), CTLFLAG_RD, 0, "");
426         if (adapter->sysctl_tree == NULL) {
427                 device_printf(adapter->dev, "can't add sysctl node\n");
428                 return (EINVAL);
429         }
430         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
431                         SYSCTL_CHILDREN(adapter->sysctl_tree),
432                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
433                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
434
435         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
436                         SYSCTL_CHILDREN(adapter->sysctl_tree),
437                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
438                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
439
440         /*
441         ** Allow a kind of speed control by forcing the autoneg
442         ** advertised speed list to only a certain value, this
443         ** supports 1G on 82599 devices, and 100Mb on x540.
444         */
445         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
446                         SYSCTL_CHILDREN(adapter->sysctl_tree),
447                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
448                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
449
450         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
451                         SYSCTL_CHILDREN(adapter->sysctl_tree),
452                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
453                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
454
455         /* Set up the timer callout */
456         callout_init_mp(&adapter->timer);
457
458         /* Determine hardware revision */
459         ixgbe_identify_hardware(adapter);
460
461         /* Do base PCI setup - map BAR0 */
462         if (ixgbe_allocate_pci_resources(adapter)) {
463                 device_printf(dev, "Allocation of PCI resources failed\n");
464                 error = ENXIO;
465                 goto err_out;
466         }
467
468         /* Do descriptor calc and sanity checks */
469         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
470             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
471                 device_printf(dev, "TXD config issue, using default!\n");
472                 adapter->num_tx_desc = DEFAULT_TXD;
473         } else
474                 adapter->num_tx_desc = ixgbe_txd;
475
476         /*
477         ** With many RX rings it is easy to exceed the
478         ** system mbuf allocation. Tuning nmbclusters
479         ** can alleviate this.
480         */
481         if (nmbclusters > 0 ) {
482                 int s;
483                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
484                 if (s > nmbclusters) {
485                         device_printf(dev, "RX Descriptors exceed "
486                             "system mbuf max, using default instead!\n");
487                         ixgbe_rxd = DEFAULT_RXD;
488                 }
489         }
490
491         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
492             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
493                 device_printf(dev, "RXD config issue, using default!\n");
494                 adapter->num_rx_desc = DEFAULT_RXD;
495         } else
496                 adapter->num_rx_desc = ixgbe_rxd;
497
498         /* Allocate our TX/RX Queues */
499         if (ixgbe_allocate_queues(adapter)) {
500                 error = ENOMEM;
501                 goto err_out;
502         }
503
504         /* Allocate multicast array memory. */
505         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
506             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
507         if (adapter->mta == NULL) {
508                 device_printf(dev, "Can not allocate multicast setup array\n");
509                 error = ENOMEM;
510                 goto err_late;
511         }
512
513         /* Initialize the shared code */
514         error = ixgbe_init_shared_code(hw);
515         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
516                 /*
517                 ** No optics in this port, set up
518                 ** so the timer routine will probe 
519                 ** for later insertion.
520                 */
521                 adapter->sfp_probe = TRUE;
522                 error = 0;
523         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
524                 device_printf(dev,"Unsupported SFP+ module detected!\n");
525                 error = EIO;
526                 goto err_late;
527         } else if (error) {
528                 device_printf(dev,"Unable to initialize the shared code\n");
529                 error = EIO;
530                 goto err_late;
531         }
532
533         /* Make sure we have a good EEPROM before we read from it */
534         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
535                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
536                 error = EIO;
537                 goto err_late;
538         }
539
540         error = ixgbe_init_hw(hw);
541         switch (error) {
542         case IXGBE_ERR_EEPROM_VERSION:
543                 device_printf(dev, "This device is a pre-production adapter/"
544                     "LOM.  Please be aware there may be issues associated "
545                     "with your hardware.\n If you are experiencing problems "
546                     "please contact your Intel or hardware representative "
547                     "who provided you with this hardware.\n");
548                 break;
549         case IXGBE_ERR_SFP_NOT_SUPPORTED:
550                 device_printf(dev,"Unsupported SFP+ Module\n");
551                 error = EIO;
552                 device_printf(dev,"Hardware Initialization Failure\n");
553                 goto err_late;
554         case IXGBE_ERR_SFP_NOT_PRESENT:
555                 device_printf(dev,"No SFP+ Module found\n");
556                 /* falls thru */
557         default:
558                 break;
559         }
560
561         /* Detect and set physical type */
562         ixgbe_setup_optics(adapter);
563
564         if ((adapter->msix > 1) && (ixgbe_enable_msix))
565                 error = ixgbe_allocate_msix(adapter); 
566         else
567                 error = ixgbe_allocate_legacy(adapter); 
568         if (error) 
569                 goto err_late;
570
571         /* Setup OS specific network interface */
572         if (ixgbe_setup_interface(dev, adapter) != 0)
573                 goto err_late;
574
575         /* Sysctl for limiting the amount of work done in the taskqueue */
576         ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
577             "max number of rx packets to process", &adapter->rx_process_limit,
578             ixgbe_rx_process_limit);
579
580         /* Initialize statistics */
581         ixgbe_update_stats_counters(adapter);
582
583         /* Register for VLAN events */
584         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
585             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
586         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
587             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
588
589         /* Print PCIE bus type/speed/width info */
590         ixgbe_get_bus_info(hw);
591         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
592             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
593             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
594             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
595             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
596             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
597             ("Unknown"));
598
599         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
600             (hw->bus.speed == ixgbe_bus_speed_2500)) {
601                 device_printf(dev, "PCI-Express bandwidth available"
602                     " for this card\n     is not sufficient for"
603                     " optimal performance.\n");
604                 device_printf(dev, "For optimal performance a x8 "
605                     "PCIE, or x4 PCIE 2 slot is required.\n");
606         }
607
608         /* let hardware know driver is loaded */
609         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
610         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
611         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
612
613         ixgbe_add_hw_stats(adapter);
614
615 #ifdef DEV_NETMAP
616         ixgbe_netmap_attach(adapter);
617 #endif /* DEV_NETMAP */
618         INIT_DEBUGOUT("ixgbe_attach: end");
619         return (0);
620 err_late:
621         ixgbe_free_transmit_structures(adapter);
622         ixgbe_free_receive_structures(adapter);
623 err_out:
624         if (adapter->ifp != NULL)
625                 if_free(adapter->ifp);
626         ixgbe_free_pci_resources(adapter);
627         kfree(adapter->mta, M_DEVBUF);
628         return (error);
629
630 }
631
632 /*********************************************************************
633  *  Device removal routine
634  *
635  *  The detach entry point is called when the driver is being removed.
636  *  This routine stops the adapter and deallocates all the resources
637  *  that were allocated for driver operation.
638  *
639  *  return 0 on success, positive on failure
640  *********************************************************************/
641
642 static int
643 ixgbe_detach(device_t dev)
644 {
645         struct adapter *adapter = device_get_softc(dev);
646         struct ix_queue *que = adapter->queues;
647         u32     ctrl_ext;
648
649         INIT_DEBUGOUT("ixgbe_detach: begin");
650
651         /* Make sure VLANS are not using driver */
652         if (adapter->ifp->if_vlantrunks != NULL) {
653                 device_printf(dev,"Vlan in use, detach first\n");
654                 return (EBUSY);
655         }
656
657         IXGBE_CORE_LOCK(adapter);
658         ixgbe_stop(adapter);
659         IXGBE_CORE_UNLOCK(adapter);
660
661         for (int i = 0; i < adapter->num_queues; i++, que++) {
662                 if (que->tq) {
663                         taskqueue_drain(que->tq, &que->que_task);
664                         taskqueue_free(que->tq);
665                 }
666         }
667
668         /* Drain the Link queue */
669         if (adapter->tq) {
670                 taskqueue_drain(adapter->tq, &adapter->link_task);
671                 taskqueue_drain(adapter->tq, &adapter->mod_task);
672                 taskqueue_drain(adapter->tq, &adapter->msf_task);
673 #ifdef IXGBE_FDIR
674                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
675 #endif
676                 taskqueue_free(adapter->tq);
677         }
678
679         /* let hardware know driver is unloading */
680         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
681         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
682         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
683
684         /* Unregister VLAN events */
685         if (adapter->vlan_attach != NULL)
686                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
687         if (adapter->vlan_detach != NULL)
688                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
689
690         ether_ifdetach(adapter->ifp);
691         callout_stop(&adapter->timer);
692 #ifdef DEV_NETMAP
693         netmap_detach(adapter->ifp);
694 #endif /* DEV_NETMAP */
695         ixgbe_free_pci_resources(adapter);
696         bus_generic_detach(dev);
697         if_free(adapter->ifp);
698
699         ixgbe_free_transmit_structures(adapter);
700         ixgbe_free_receive_structures(adapter);
701         kfree(adapter->mta, M_DEVBUF);
702         sysctl_ctx_free(&adapter->sysctl_ctx);
703         
704         IXGBE_CORE_LOCK_DESTROY(adapter);
705         return (0);
706 }
707
708 /*********************************************************************
709  *
710  *  Shutdown entry point
711  *
712  **********************************************************************/
713
714 static int
715 ixgbe_shutdown(device_t dev)
716 {
717         struct adapter *adapter = device_get_softc(dev);
718         IXGBE_CORE_LOCK(adapter);
719         ixgbe_stop(adapter);
720         IXGBE_CORE_UNLOCK(adapter);
721         return (0);
722 }
723
724
725 /*********************************************************************
726  *  Transmit entry point
727  *
728  *  ixgbe_start is called by the stack to initiate a transmit.
729  *  The driver will remain in this routine as long as there are
730  *  packets to transmit and transmit resources are available.
731  *  In case resources are not available stack is notified and
732  *  the packet is requeued.
733  **********************************************************************/
734
735 static void
736 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
737 {
738         struct mbuf    *m_head;
739         struct adapter *adapter = txr->adapter;
740
741         IXGBE_TX_LOCK_ASSERT(txr);
742
743         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
744                 return;
745         if (!adapter->link_active)
746                 return;
747
748         while (!ifq_is_empty(&ifp->if_snd)) {
749                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
750                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
751                         break;
752                 }
753
754                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
755                 if (m_head == NULL)
756                         break;
757
758                 if (ixgbe_xmit(txr, &m_head)) {
759 #if 0 /* XXX: prepend to an ALTQ queue ? */
760                         if (m_head != NULL)
761                                 IF_PREPEND(&ifp->if_snd, m_head);
762 #endif
763                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
764                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
765                         break;
766                 }
767                 /* Send a copy of the frame to the BPF listener */
768                 ETHER_BPF_MTAP(ifp, m_head);
769
770                 /* Set watchdog on */
771                 txr->watchdog_time = ticks;
772                 txr->queue_status = IXGBE_QUEUE_WORKING;
773
774         }
775         return;
776 }
777
778 /*
779  * Legacy TX start - called by the stack, this
780  * always uses the first tx ring, and should
781  * not be used with multiqueue tx enabled.
782  */
783 static void
784 ixgbe_start(struct ifnet *ifp)
785 {
786         struct adapter *adapter = ifp->if_softc;
787         struct tx_ring  *txr = adapter->tx_rings;
788
789         if (ifp->if_flags & IFF_RUNNING) {
790                 IXGBE_TX_LOCK(txr);
791                 ixgbe_start_locked(txr, ifp);
792                 IXGBE_TX_UNLOCK(txr);
793         }
794         return;
795 }
796
797 #if 0 /* __FreeBSD_version >= 800000 */
798 /*
799 ** Multiqueue Transmit driver
800 **
801 */
802 static int
803 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
804 {
805         struct adapter  *adapter = ifp->if_softc;
806         struct ix_queue *que;
807         struct tx_ring  *txr;
808         int             i = 0, err = 0;
809
810         /* Which queue to use */
811         if ((m->m_flags & M_FLOWID) != 0)
812                 i = m->m_pkthdr.flowid % adapter->num_queues;
813         else
814                 i = curcpu % adapter->num_queues;
815
816         txr = &adapter->tx_rings[i];
817         que = &adapter->queues[i];
818
819         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
820             IXGBE_TX_TRYLOCK(txr)) {
821                 err = ixgbe_mq_start_locked(ifp, txr, m);
822                 IXGBE_TX_UNLOCK(txr);
823         } else {
824                 err = drbr_enqueue(ifp, txr->br, m);
825                 taskqueue_enqueue(que->tq, &que->que_task);
826         }
827
828         return (err);
829 }
830
831 static int
832 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
833 {
834         struct adapter  *adapter = txr->adapter;
835         struct mbuf     *next;
836         int             enqueued, err = 0;
837
838         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
839             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
840             adapter->link_active == 0) {
841                 if (m != NULL)
842                         err = drbr_enqueue(ifp, txr->br, m);
843                 return (err);
844         }
845
846         enqueued = 0;
847         if (m == NULL) {
848                 next = drbr_dequeue(ifp, txr->br);
849         } else if (drbr_needs_enqueue(ifp, txr->br)) {
850                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
851                         return (err);
852                 next = drbr_dequeue(ifp, txr->br);
853         } else
854                 next = m;
855
856         /* Process the queue */
857         while (next != NULL) {
858                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
859                         if (next != NULL)
860                                 err = drbr_enqueue(ifp, txr->br, next);
861                         break;
862                 }
863                 enqueued++;
864                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
865                 /* Send a copy of the frame to the BPF listener */
866                 ETHER_BPF_MTAP(ifp, next);
867                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
868                         break;
869                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
870                         ixgbe_txeof(txr);
871                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
872                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
873                         break;
874                 }
875                 next = drbr_dequeue(ifp, txr->br);
876         }
877
878         if (enqueued > 0) {
879                 /* Set watchdog on */
880                 txr->queue_status |= IXGBE_QUEUE_WORKING;
881                 txr->watchdog_time = ticks;
882         }
883
884         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
885                 ixgbe_txeof(txr);
886
887         return (err);
888 }
889
890 /*
891 ** Flush all ring buffers
892 */
893 static void
894 ixgbe_qflush(struct ifnet *ifp)
895 {
896         struct adapter  *adapter = ifp->if_softc;
897         struct tx_ring  *txr = adapter->tx_rings;
898         struct mbuf     *m;
899
900         for (int i = 0; i < adapter->num_queues; i++, txr++) {
901                 IXGBE_TX_LOCK(txr);
902                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
903                         m_freem(m);
904                 IXGBE_TX_UNLOCK(txr);
905         }
906         if_qflush(ifp);
907 }
908 #endif /* __FreeBSD_version >= 800000 */
909
910 /*********************************************************************
911  *  Ioctl entry point
912  *
913  *  ixgbe_ioctl is called when the user wants to configure the
914  *  interface.
915  *
916  *  return 0 on success, positive on failure
917  **********************************************************************/
918
919 static int
920 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
921 {
922         struct adapter  *adapter = ifp->if_softc;
923         struct ifreq    *ifr = (struct ifreq *) data;
924 #if defined(INET) || defined(INET6)
925         struct ifaddr *ifa = (struct ifaddr *)data;
926         bool            avoid_reset = FALSE;
927 #endif
928         int             error = 0;
929
930         switch (command) {
931
932         case SIOCSIFADDR:
933 #ifdef INET
934                 if (ifa->ifa_addr->sa_family == AF_INET)
935                         avoid_reset = TRUE;
936 #endif
937 #ifdef INET6
938                 if (ifa->ifa_addr->sa_family == AF_INET6)
939                         avoid_reset = TRUE;
940 #endif
941 #if defined(INET) || defined(INET6)
942                 /*
943                 ** Calling init results in link renegotiation,
944                 ** so we avoid doing it when possible.
945                 */
946                 if (avoid_reset) {
947                         ifp->if_flags |= IFF_UP;
948                         if (!(ifp->if_flags & IFF_RUNNING))
949                                 ixgbe_init(adapter);
950                         if (!(ifp->if_flags & IFF_NOARP))
951                                 arp_ifinit(ifp, ifa);
952                 } else
953                         error = ether_ioctl(ifp, command, data);
954 #endif
955                 break;
956         case SIOCSIFMTU:
957                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
958                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
959                         error = EINVAL;
960                 } else {
961                         IXGBE_CORE_LOCK(adapter);
962                         ifp->if_mtu = ifr->ifr_mtu;
963                         adapter->max_frame_size =
964                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
965                         ixgbe_init_locked(adapter);
966                         IXGBE_CORE_UNLOCK(adapter);
967                 }
968                 break;
969         case SIOCSIFFLAGS:
970                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
971                 IXGBE_CORE_LOCK(adapter);
972                 if (ifp->if_flags & IFF_UP) {
973                         if ((ifp->if_flags & IFF_RUNNING)) {
974                                 if ((ifp->if_flags ^ adapter->if_flags) &
975                                     (IFF_PROMISC | IFF_ALLMULTI)) {
976                                         ixgbe_set_promisc(adapter);
977                                 }
978                         } else
979                                 ixgbe_init_locked(adapter);
980                 } else
981                         if (ifp->if_flags & IFF_RUNNING)
982                                 ixgbe_stop(adapter);
983                 adapter->if_flags = ifp->if_flags;
984                 IXGBE_CORE_UNLOCK(adapter);
985                 break;
986         case SIOCADDMULTI:
987         case SIOCDELMULTI:
988                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
989                 if (ifp->if_flags & IFF_RUNNING) {
990                         IXGBE_CORE_LOCK(adapter);
991                         ixgbe_disable_intr(adapter);
992                         ixgbe_set_multi(adapter);
993                         ixgbe_enable_intr(adapter);
994                         IXGBE_CORE_UNLOCK(adapter);
995                 }
996                 break;
997         case SIOCSIFMEDIA:
998         case SIOCGIFMEDIA:
999                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1000                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1001                 break;
1002         case SIOCSIFCAP:
1003         {
1004                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1005                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1006                 if (mask & IFCAP_HWCSUM)
1007                         ifp->if_capenable ^= IFCAP_HWCSUM;
1008                 if (mask & IFCAP_TSO4)
1009                         ifp->if_capenable ^= IFCAP_TSO4;
1010                 if (mask & IFCAP_TSO6)
1011                         ifp->if_capenable ^= IFCAP_TSO6;
1012 #if 0 /* NET_LRO */
1013                 if (mask & IFCAP_LRO)
1014                         ifp->if_capenable ^= IFCAP_LRO;
1015 #endif
1016                 if (mask & IFCAP_VLAN_HWTAGGING)
1017                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1018                 if (mask & IFCAP_VLAN_HWFILTER)
1019                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1020 #if 0 /* NET_TSO */
1021                 if (mask & IFCAP_VLAN_HWTSO)
1022                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1023 #endif
1024                 if (ifp->if_flags & IFF_RUNNING) {
1025                         IXGBE_CORE_LOCK(adapter);
1026                         ixgbe_init_locked(adapter);
1027                         IXGBE_CORE_UNLOCK(adapter);
1028                 }
1029 #if 0
1030                 VLAN_CAPABILITIES(ifp);
1031 #endif
1032                 break;
1033         }
1034
1035         default:
1036                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1037                 error = ether_ioctl(ifp, command, data);
1038                 break;
1039         }
1040
1041         return (error);
1042 }
1043
1044 /*********************************************************************
1045  *  Init entry point
1046  *
1047  *  This routine is used in two ways. It is used by the stack as
1048  *  init entry point in network interface structure. It is also used
1049  *  by the driver as a hw/sw initialization routine to get to a
1050  *  consistent state.
1051  *
1052  *  return 0 on success, positive on failure
1053  **********************************************************************/
1054 #define IXGBE_MHADD_MFS_SHIFT 16
1055
1056 static void
1057 ixgbe_init_locked(struct adapter *adapter)
1058 {
1059         struct ifnet   *ifp = adapter->ifp;
1060         device_t        dev = adapter->dev;
1061         struct ixgbe_hw *hw = &adapter->hw;
1062         u32             k, txdctl, mhadd, gpie;
1063         u32             rxdctl, rxctrl;
1064
1065         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1066         INIT_DEBUGOUT("ixgbe_init: begin");
1067         hw->adapter_stopped = FALSE;
1068         ixgbe_stop_adapter(hw);
1069         callout_stop(&adapter->timer);
1070
1071         /* reprogram the RAR[0] in case user changed it. */
1072         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1073
1074         /* Get the latest mac address, User can use a LAA */
1075         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1076               IXGBE_ETH_LENGTH_OF_ADDRESS);
1077         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1078         hw->addr_ctrl.rar_used_count = 1;
1079
1080         /* Set the various hardware offload abilities */
1081         ifp->if_hwassist = 0;
1082         if (ifp->if_capenable & IFCAP_TSO)
1083                 ifp->if_hwassist |= CSUM_TSO;
1084         if (ifp->if_capenable & IFCAP_TXCSUM) {
1085                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1086 #if 0
1087                 if (hw->mac.type != ixgbe_mac_82598EB)
1088                         ifp->if_hwassist |= CSUM_SCTP;
1089 #endif
1090         }
1091
1092         /* Prepare transmit descriptors and buffers */
1093         if (ixgbe_setup_transmit_structures(adapter)) {
1094                 device_printf(dev,"Could not setup transmit structures\n");
1095                 ixgbe_stop(adapter);
1096                 return;
1097         }
1098
1099         ixgbe_init_hw(hw);
1100         ixgbe_initialize_transmit_units(adapter);
1101
1102         /* Setup Multicast table */
1103         ixgbe_set_multi(adapter);
1104
1105         /*
1106         ** Determine the correct mbuf pool
1107         ** for doing jumbo/headersplit
1108         */
1109         if (adapter->max_frame_size <= 2048)
1110                 adapter->rx_mbuf_sz = MCLBYTES;
1111         else if (adapter->max_frame_size <= 4096)
1112                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1113         else if (adapter->max_frame_size <= 9216)
1114                 adapter->rx_mbuf_sz = MJUM9BYTES;
1115         else
1116                 adapter->rx_mbuf_sz = MJUM16BYTES;
1117
1118         /* Prepare receive descriptors and buffers */
1119         if (ixgbe_setup_receive_structures(adapter)) {
1120                 device_printf(dev,"Could not setup receive structures\n");
1121                 ixgbe_stop(adapter);
1122                 return;
1123         }
1124
1125         /* Configure RX settings */
1126         ixgbe_initialize_receive_units(adapter);
1127
1128         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1129
1130         /* Enable Fan Failure Interrupt */
1131         gpie |= IXGBE_SDP1_GPIEN;
1132
1133         /* Add for Module detection */
1134         if (hw->mac.type == ixgbe_mac_82599EB)
1135                 gpie |= IXGBE_SDP2_GPIEN;
1136
1137         /* Thermal Failure Detection */
1138         if (hw->mac.type == ixgbe_mac_X540)
1139                 gpie |= IXGBE_SDP0_GPIEN;
1140
1141         if (adapter->msix > 1) {
1142                 /* Enable Enhanced MSIX mode */
1143                 gpie |= IXGBE_GPIE_MSIX_MODE;
1144                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1145                     IXGBE_GPIE_OCD;
1146         }
1147         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1148
1149         /* Set MTU size */
1150         if (ifp->if_mtu > ETHERMTU) {
1151                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1152                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1153                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1154                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1155         }
1156         
1157         /* Now enable all the queues */
1158
1159         for (int i = 0; i < adapter->num_queues; i++) {
1160                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1161                 txdctl |= IXGBE_TXDCTL_ENABLE;
1162                 /* Set WTHRESH to 8, burst writeback */
1163                 txdctl |= (8 << 16);
1164                 /*
1165                  * When the internal queue falls below PTHRESH (32),
1166                  * start prefetching as long as there are at least
1167                  * HTHRESH (1) buffers ready. The values are taken
1168                  * from the Intel linux driver 3.8.21.
1169                  * Prefetching enables tx line rate even with 1 queue.
1170                  */
1171                 txdctl |= (32 << 0) | (1 << 8);
1172                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1173         }
1174
1175         for (int i = 0; i < adapter->num_queues; i++) {
1176                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1177                 if (hw->mac.type == ixgbe_mac_82598EB) {
1178                         /*
1179                         ** PTHRESH = 21
1180                         ** HTHRESH = 4
1181                         ** WTHRESH = 8
1182                         */
1183                         rxdctl &= ~0x3FFFFF;
1184                         rxdctl |= 0x080420;
1185                 }
1186                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1187                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1188                 for (k = 0; k < 10; k++) {
1189                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1190                             IXGBE_RXDCTL_ENABLE)
1191                                 break;
1192                         else
1193                                 msec_delay(1);
1194                 }
1195                 wmb();
1196 #ifdef DEV_NETMAP
1197                 /*
1198                  * In netmap mode, we must preserve the buffers made
1199                  * available to userspace before the if_init()
1200                  * (this is true by default on the TX side, because
1201                  * init makes all buffers available to userspace).
1202                  *
1203                  * netmap_reset() and the device specific routines
1204                  * (e.g. ixgbe_setup_receive_rings()) map these
1205                  * buffers at the end of the NIC ring, so here we
1206                  * must set the RDT (tail) register to make sure
1207                  * they are not overwritten.
1208                  *
1209                  * In this driver the NIC ring starts at RDH = 0,
1210                  * RDT points to the last slot available for reception (?),
1211                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1212                  */
1213                 if (ifp->if_capenable & IFCAP_NETMAP) {
1214                         struct netmap_adapter *na = NA(adapter->ifp);
1215                         struct netmap_kring *kring = &na->rx_rings[i];
1216                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1217
1218                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1219                 } else
1220 #endif /* DEV_NETMAP */
1221                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1222         }
1223
1224         /* Set up VLAN support and filter */
1225         ixgbe_setup_vlan_hw_support(adapter);
1226
1227         /* Enable Receive engine */
1228         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1229         if (hw->mac.type == ixgbe_mac_82598EB)
1230                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1231         rxctrl |= IXGBE_RXCTRL_RXEN;
1232         ixgbe_enable_rx_dma(hw, rxctrl);
1233
1234         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1235
1236         /* Set up MSI/X routing */
1237         if (ixgbe_enable_msix)  {
1238                 ixgbe_configure_ivars(adapter);
1239                 /* Set up auto-mask */
1240                 if (hw->mac.type == ixgbe_mac_82598EB)
1241                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1242                 else {
1243                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1244                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1245                 }
1246         } else {  /* Simple settings for Legacy/MSI */
1247                 ixgbe_set_ivar(adapter, 0, 0, 0);
1248                 ixgbe_set_ivar(adapter, 0, 0, 1);
1249                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1250         }
1251
1252 #ifdef IXGBE_FDIR
1253         /* Init Flow director */
1254         if (hw->mac.type != ixgbe_mac_82598EB) {
1255                 u32 hdrm = 32 << fdir_pballoc;
1256
1257                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1258                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1259         }
1260 #endif
1261
1262         /*
1263         ** Check on any SFP devices that
1264         ** need to be kick-started
1265         */
1266         if (hw->phy.type == ixgbe_phy_none) {
1267                 int err = hw->phy.ops.identify(hw);
1268                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1269                         device_printf(dev,
1270                             "Unsupported SFP+ module type was detected.\n");
1271                         return;
1272                 }
1273         }
1274
1275         /* Set moderation on the Link interrupt */
1276         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1277
1278         /* Config/Enable Link */
1279         ixgbe_config_link(adapter);
1280
1281         /* Hardware Packet Buffer & Flow Control setup */
1282         {
1283                 u32 rxpb, frame, size, tmp;
1284
1285                 frame = adapter->max_frame_size;
1286
1287                 /* Calculate High Water */
1288                 if (hw->mac.type == ixgbe_mac_X540)
1289                         tmp = IXGBE_DV_X540(frame, frame);
1290                 else
1291                         tmp = IXGBE_DV(frame, frame);
1292                 size = IXGBE_BT2KB(tmp);
1293                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1294                 hw->fc.high_water[0] = rxpb - size;
1295
1296                 /* Now calculate Low Water */
1297                 if (hw->mac.type == ixgbe_mac_X540)
1298                         tmp = IXGBE_LOW_DV_X540(frame);
1299                 else
1300                         tmp = IXGBE_LOW_DV(frame);
1301                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1302                 
1303                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1304                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1305                 hw->fc.send_xon = TRUE;
1306         }
1307         /* Initialize the FC settings */
1308         ixgbe_start_hw(hw);
1309
1310         /* And now turn on interrupts */
1311         ixgbe_enable_intr(adapter);
1312
1313         /* Now inform the stack we're ready */
1314         ifp->if_flags |= IFF_RUNNING;
1315         ifp->if_flags &= ~IFF_OACTIVE;
1316
1317         return;
1318 }
1319
1320 static void
1321 ixgbe_init(void *arg)
1322 {
1323         struct adapter *adapter = arg;
1324
1325         IXGBE_CORE_LOCK(adapter);
1326         ixgbe_init_locked(adapter);
1327         IXGBE_CORE_UNLOCK(adapter);
1328         return;
1329 }
1330
1331
1332 /*
1333 **
1334 ** MSIX Interrupt Handlers and Tasklets
1335 **
1336 */
1337
1338 static inline void
1339 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1340 {
1341         struct ixgbe_hw *hw = &adapter->hw;
1342         u64     queue = (u64)(1 << vector);
1343         u32     mask;
1344
1345         if (hw->mac.type == ixgbe_mac_82598EB) {
1346                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1347                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1348         } else {
1349                 mask = (queue & 0xFFFFFFFF);
1350                 if (mask)
1351                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1352                 mask = (queue >> 32);
1353                 if (mask)
1354                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1355         }
1356 }
1357
1358 static inline void
1359 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1360 {
1361         struct ixgbe_hw *hw = &adapter->hw;
1362         u64     queue = (u64)(1 << vector);
1363         u32     mask;
1364
1365         if (hw->mac.type == ixgbe_mac_82598EB) {
1366                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1367                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1368         } else {
1369                 mask = (queue & 0xFFFFFFFF);
1370                 if (mask)
1371                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1372                 mask = (queue >> 32);
1373                 if (mask)
1374                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1375         }
1376 }
1377
1378 static inline void
1379 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1380 {
1381         u32 mask;
1382
1383         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1384                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1385                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1386         } else {
1387                 mask = (queues & 0xFFFFFFFF);
1388                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1389                 mask = (queues >> 32);
1390                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1391         }
1392 }
1393
1394
1395 static void
1396 ixgbe_handle_que(void *context, int pending)
1397 {
1398         struct ix_queue *que = context;
1399         struct adapter  *adapter = que->adapter;
1400         struct tx_ring  *txr = que->txr;
1401         struct ifnet    *ifp = adapter->ifp;
1402         bool            more;
1403
1404         if (ifp->if_flags & IFF_RUNNING) {
1405                 more = ixgbe_rxeof(que, adapter->rx_process_limit);
1406                 IXGBE_TX_LOCK(txr);
1407                 ixgbe_txeof(txr);
1408 #if 0 /*__FreeBSD_version >= 800000*/
1409                 if (!drbr_empty(ifp, txr->br))
1410                         ixgbe_mq_start_locked(ifp, txr, NULL);
1411 #else
1412                 if (!ifq_is_empty(&ifp->if_snd))
1413                         ixgbe_start_locked(txr, ifp);
1414 #endif
1415                 IXGBE_TX_UNLOCK(txr);
1416                 if (more) {
1417                         taskqueue_enqueue(que->tq, &que->que_task);
1418                         return;
1419                 }
1420         }
1421
1422         /* Reenable this interrupt */
1423         ixgbe_enable_queue(adapter, que->msix);
1424         return;
1425 }
1426
1427
1428 /*********************************************************************
1429  *
1430  *  Legacy Interrupt Service routine
1431  *
1432  **********************************************************************/
1433
1434 static void
1435 ixgbe_legacy_irq(void *arg)
1436 {
1437         struct ix_queue *que = arg;
1438         struct adapter  *adapter = que->adapter;
1439         struct ixgbe_hw *hw = &adapter->hw;
1440         struct          tx_ring *txr = adapter->tx_rings;
1441         bool            more_tx, more_rx;
1442         u32             reg_eicr, loop = MAX_LOOP;
1443
1444
1445         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1446
1447         ++que->irqs;
1448         if (reg_eicr == 0) {
1449                 ixgbe_enable_intr(adapter);
1450                 return;
1451         }
1452
1453         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1454
1455         IXGBE_TX_LOCK(txr);
1456         do {
1457                 more_tx = ixgbe_txeof(txr);
1458         } while (loop-- && more_tx);
1459         IXGBE_TX_UNLOCK(txr);
1460
1461         if (more_rx || more_tx)
1462                 taskqueue_enqueue(que->tq, &que->que_task);
1463
1464         /* Check for fan failure */
1465         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1466             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1467                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1468                     "REPLACE IMMEDIATELY!!\n");
1469                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1470         }
1471
1472         /* Link status change */
1473         if (reg_eicr & IXGBE_EICR_LSC)
1474                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1475
1476         ixgbe_enable_intr(adapter);
1477         return;
1478 }
1479
1480
1481 /*********************************************************************
1482  *
1483  *  MSIX Queue Interrupt Service routine
1484  *
1485  **********************************************************************/
1486 void
1487 ixgbe_msix_que(void *arg)
1488 {
1489         struct ix_queue *que = arg;
1490         struct adapter  *adapter = que->adapter;
1491         struct tx_ring  *txr = que->txr;
1492         struct rx_ring  *rxr = que->rxr;
1493         bool            more_tx, more_rx;
1494         u32             newitr = 0;
1495
1496         ixgbe_disable_queue(adapter, que->msix);
1497         ++que->irqs;
1498
1499         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1500
1501         IXGBE_TX_LOCK(txr);
1502         more_tx = ixgbe_txeof(txr);
1503         /*
1504         ** Make certain that if the stack 
1505         ** has anything queued the task gets
1506         ** scheduled to handle it.
1507         */
1508 #if 0
1509 #if __FreeBSD_version < 800000
1510         if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
1511 #else
1512         if (!drbr_empty(adapter->ifp, txr->br))
1513 #endif
1514 #endif
1515         if (!ifq_is_empty(&adapter->ifp->if_snd))
1516                 more_tx = 1;
1517         IXGBE_TX_UNLOCK(txr);
1518
1519         /* Do AIM now? */
1520
1521         if (ixgbe_enable_aim == FALSE)
1522                 goto no_calc;
1523         /*
1524         ** Do Adaptive Interrupt Moderation:
1525         **  - Write out last calculated setting
1526         **  - Calculate based on average size over
1527         **    the last interval.
1528         */
1529         if (que->eitr_setting)
1530                 IXGBE_WRITE_REG(&adapter->hw,
1531                     IXGBE_EITR(que->msix), que->eitr_setting);
1532  
1533         que->eitr_setting = 0;
1534
1535         /* Idle, do nothing */
1536         if ((txr->bytes == 0) && (rxr->bytes == 0))
1537                 goto no_calc;
1538                                 
1539         if ((txr->bytes) && (txr->packets))
1540                 newitr = txr->bytes/txr->packets;
1541         if ((rxr->bytes) && (rxr->packets))
1542                 newitr = max(newitr,
1543                     (rxr->bytes / rxr->packets));
1544         newitr += 24; /* account for hardware frame, crc */
1545
1546         /* set an upper boundary */
1547         newitr = min(newitr, 3000);
1548
1549         /* Be nice to the mid range */
1550         if ((newitr > 300) && (newitr < 1200))
1551                 newitr = (newitr / 3);
1552         else
1553                 newitr = (newitr / 2);
1554
1555         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1556                 newitr |= newitr << 16;
1557         else
1558                 newitr |= IXGBE_EITR_CNT_WDIS;
1559                  
1560         /* save for next interrupt */
1561         que->eitr_setting = newitr;
1562
1563         /* Reset state */
1564         txr->bytes = 0;
1565         txr->packets = 0;
1566         rxr->bytes = 0;
1567         rxr->packets = 0;
1568
1569 no_calc:
1570         if (more_tx || more_rx)
1571                 taskqueue_enqueue(que->tq, &que->que_task);
1572         else /* Reenable this interrupt */
1573                 ixgbe_enable_queue(adapter, que->msix);
1574         return;
1575 }
1576
1577
1578 static void
1579 ixgbe_msix_link(void *arg)
1580 {
1581         struct adapter  *adapter = arg;
1582         struct ixgbe_hw *hw = &adapter->hw;
1583         u32             reg_eicr;
1584
1585         ++adapter->link_irq;
1586
1587         /* First get the cause */
1588         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1589         /* Clear interrupt with write */
1590         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1591
1592         /* Link status change */
1593         if (reg_eicr & IXGBE_EICR_LSC)
1594                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1595
1596         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1597 #ifdef IXGBE_FDIR
1598                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1599                         /* This is probably overkill :) */
1600                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1601                                 return;
1602                         /* Disable the interrupt */
1603                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1604                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1605                 } else
1606 #endif
1607                 if (reg_eicr & IXGBE_EICR_ECC) {
1608                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1609                             "Please Reboot!!\n");
1610                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1611                 } else
1612
1613                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1614                         /* Clear the interrupt */
1615                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1616                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1617                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1618                         /* Clear the interrupt */
1619                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1620                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1621                 }
1622         } 
1623
1624         /* Check for fan failure */
1625         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1626             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1627                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1628                     "REPLACE IMMEDIATELY!!\n");
1629                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1630         }
1631
1632         /* Check for over temp condition */
1633         if ((hw->mac.type == ixgbe_mac_X540) &&
1634             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1635                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1636                     "PHY IS SHUT DOWN!!\n");
1637                 device_printf(adapter->dev, "System shutdown required\n");
1638                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1639         }
1640
1641         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1642         return;
1643 }
1644
1645 /*********************************************************************
1646  *
1647  *  Media Ioctl callback
1648  *
1649  *  This routine is called whenever the user queries the status of
1650  *  the interface using ifconfig.
1651  *
1652  **********************************************************************/
1653 static void
1654 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1655 {
1656         struct adapter *adapter = ifp->if_softc;
1657
1658         INIT_DEBUGOUT("ixgbe_media_status: begin");
1659         IXGBE_CORE_LOCK(adapter);
1660         ixgbe_update_link_status(adapter);
1661
1662         ifmr->ifm_status = IFM_AVALID;
1663         ifmr->ifm_active = IFM_ETHER;
1664
1665         if (!adapter->link_active) {
1666                 IXGBE_CORE_UNLOCK(adapter);
1667                 return;
1668         }
1669
1670         ifmr->ifm_status |= IFM_ACTIVE;
1671
1672         switch (adapter->link_speed) {
1673                 case IXGBE_LINK_SPEED_100_FULL:
1674                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1675                         break;
1676                 case IXGBE_LINK_SPEED_1GB_FULL:
1677                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1678                         break;
1679                 case IXGBE_LINK_SPEED_10GB_FULL:
1680                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1681                         break;
1682         }
1683
1684         IXGBE_CORE_UNLOCK(adapter);
1685
1686         return;
1687 }
1688
1689 /*********************************************************************
1690  *
1691  *  Media Ioctl callback
1692  *
1693  *  This routine is called when the user changes speed/duplex using
1694  *  media/mediopt option with ifconfig.
1695  *
1696  **********************************************************************/
1697 static int
1698 ixgbe_media_change(struct ifnet * ifp)
1699 {
1700         struct adapter *adapter = ifp->if_softc;
1701         struct ifmedia *ifm = &adapter->media;
1702
1703         INIT_DEBUGOUT("ixgbe_media_change: begin");
1704
1705         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1706                 return (EINVAL);
1707
1708         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1709         case IFM_AUTO:
1710                 adapter->hw.phy.autoneg_advertised =
1711                     IXGBE_LINK_SPEED_100_FULL |
1712                     IXGBE_LINK_SPEED_1GB_FULL |
1713                     IXGBE_LINK_SPEED_10GB_FULL;
1714                 break;
1715         default:
1716                 device_printf(adapter->dev, "Only auto media type\n");
1717                 return (EINVAL);
1718         }
1719
1720         return (0);
1721 }
1722
1723 /*********************************************************************
1724  *
1725  *  This routine maps the mbufs to tx descriptors, allowing the
1726  *  TX engine to transmit the packets. 
1727  *      - return 0 on success, positive on failure
1728  *
1729  **********************************************************************/
1730
1731 static int
1732 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1733 {
1734         struct adapter  *adapter = txr->adapter;
1735         u32             olinfo_status = 0, cmd_type_len;
1736         u32             paylen = 0;
1737         int             i, j, error, nsegs, maxsegs;
1738         int             first, last = 0;
1739         struct mbuf     *m_head;
1740         bus_dma_segment_t segs[adapter->num_segs];
1741         bus_dmamap_t    map;
1742         struct ixgbe_tx_buf *txbuf;
1743         union ixgbe_adv_tx_desc *txd = NULL;
1744
1745         m_head = *m_headp;
1746
1747         /* Basic descriptor defines */
1748         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1749             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1750
1751         if (m_head->m_flags & M_VLANTAG)
1752                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1753
1754         /*
1755          * Important to capture the first descriptor
1756          * used because it will contain the index of
1757          * the one we tell the hardware to report back
1758          */
1759         first = txr->next_avail_desc;
1760         txbuf = &txr->tx_buffers[first];
1761         map = txbuf->map;
1762
1763         /*
1764          * Map the packet for DMA.
1765          */
1766         maxsegs = txr->tx_avail - IXGBE_TX_RESERVED;
1767         if (maxsegs > adapter->num_segs)
1768                 maxsegs = adapter->num_segs;
1769
1770         error = bus_dmamap_load_mbuf_defrag(txr->txtag, map, m_headp,
1771             segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1772         if (error) {
1773                 if (error == ENOBUFS)
1774                         adapter->mbuf_defrag_failed++;
1775                 else
1776                         adapter->no_tx_dma_setup++;
1777
1778                 m_freem(*m_headp);
1779                 *m_headp = NULL;
1780                 return (error);
1781         }
1782
1783         /* Make certain there are enough descriptors */
1784         if (nsegs > txr->tx_avail - 2) {
1785                 txr->no_desc_avail++;
1786                 error = ENOBUFS;
1787                 goto xmit_fail;
1788         }
1789         m_head = *m_headp;
1790
1791         /*
1792         ** Set up the appropriate offload context
1793         ** this becomes the first descriptor of 
1794         ** a packet.
1795         */
1796         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1797                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1798                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1799                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1800                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1801                         ++adapter->tso_tx;
1802                 } else
1803                         return (ENXIO);
1804         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1805                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1806
1807 #ifdef IXGBE_IEEE1588
1808         /* This is changing soon to an mtag detection */
1809         if (we detect this mbuf has a TSTAMP mtag)
1810                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1811 #endif
1812
1813 #ifdef IXGBE_FDIR
1814         /* Do the flow director magic */
1815         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1816                 ++txr->atr_count;
1817                 if (txr->atr_count >= atr_sample_rate) {
1818                         ixgbe_atr(txr, m_head);
1819                         txr->atr_count = 0;
1820                 }
1821         }
1822 #endif
1823         /* Record payload length */
1824         if (paylen == 0)
1825                 olinfo_status |= m_head->m_pkthdr.len <<
1826                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1827
1828         i = txr->next_avail_desc;
1829         for (j = 0; j < nsegs; j++) {
1830                 bus_size_t seglen;
1831                 bus_addr_t segaddr;
1832
1833                 txbuf = &txr->tx_buffers[i];
1834                 txd = &txr->tx_base[i];
1835                 seglen = segs[j].ds_len;
1836                 segaddr = htole64(segs[j].ds_addr);
1837
1838                 txd->read.buffer_addr = segaddr;
1839                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1840                     cmd_type_len |seglen);
1841                 txd->read.olinfo_status = htole32(olinfo_status);
1842                 last = i; /* descriptor that will get completion IRQ */
1843
1844                 if (++i == adapter->num_tx_desc)
1845                         i = 0;
1846
1847                 txbuf->m_head = NULL;
1848                 txbuf->eop_index = -1;
1849         }
1850
1851         txd->read.cmd_type_len |=
1852             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1853         txr->tx_avail -= nsegs;
1854         txr->next_avail_desc = i;
1855
1856         txbuf->m_head = m_head;
1857         /* Swap the dma map between the first and last descriptor */
1858         txr->tx_buffers[first].map = txbuf->map;
1859         txbuf->map = map;
1860         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1861
1862         /* Set the index of the descriptor that will be marked done */
1863         txbuf = &txr->tx_buffers[first];
1864         txbuf->eop_index = last;
1865
1866         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1867             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1868         /*
1869          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1870          * hardware that this frame is available to transmit.
1871          */
1872         ++txr->total_packets;
1873         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1874
1875         return (0);
1876
1877 xmit_fail:
1878         bus_dmamap_unload(txr->txtag, txbuf->map);
1879         return (error);
1880
1881 }
1882
1883 static void
1884 ixgbe_set_promisc(struct adapter *adapter)
1885 {
1886         u_int32_t       reg_rctl;
1887         struct ifnet   *ifp = adapter->ifp;
1888
1889         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1890         reg_rctl &= (~IXGBE_FCTRL_UPE);
1891         reg_rctl &= (~IXGBE_FCTRL_MPE);
1892         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1893
1894         if (ifp->if_flags & IFF_PROMISC) {
1895                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1896                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1897         } else if (ifp->if_flags & IFF_ALLMULTI) {
1898                 reg_rctl |= IXGBE_FCTRL_MPE;
1899                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1900                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1901         }
1902         return;
1903 }
1904
1905
1906 /*********************************************************************
1907  *  Multicast Update
1908  *
1909  *  This routine is called whenever multicast address list is updated.
1910  *
1911  **********************************************************************/
1912 #define IXGBE_RAR_ENTRIES 16
1913
1914 static void
1915 ixgbe_set_multi(struct adapter *adapter)
1916 {
1917         u32     fctrl;
1918         u8      *mta;
1919         u8      *update_ptr;
1920         struct  ifmultiaddr *ifma;
1921         int     mcnt = 0;
1922         struct ifnet   *ifp = adapter->ifp;
1923
1924         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1925
1926         mta = adapter->mta;
1927         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1928             MAX_NUM_MULTICAST_ADDRESSES);
1929
1930         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1931         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1932         if (ifp->if_flags & IFF_PROMISC)
1933                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1934         else if (ifp->if_flags & IFF_ALLMULTI) {
1935                 fctrl |= IXGBE_FCTRL_MPE;
1936                 fctrl &= ~IXGBE_FCTRL_UPE;
1937         } else
1938                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1939         
1940         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1941
1942         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1943                 if (ifma->ifma_addr->sa_family != AF_LINK)
1944                         continue;
1945                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1946                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1947                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1948                 mcnt++;
1949         }
1950
1951         update_ptr = mta;
1952         ixgbe_update_mc_addr_list(&adapter->hw,
1953             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1954
1955         return;
1956 }
1957
1958 /*
1959  * This is an iterator function now needed by the multicast
1960  * shared code. It simply feeds the shared code routine the
1961  * addresses in the array of ixgbe_set_multi() one by one.
1962  */
1963 static u8 *
1964 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1965 {
1966         u8 *addr = *update_ptr;
1967         u8 *newptr;
1968         *vmdq = 0;
1969
1970         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1971         *update_ptr = newptr;
1972         return addr;
1973 }
1974
1975
1976 /*********************************************************************
1977  *  Timer routine
1978  *
1979  *  This routine checks for link status,updates statistics,
1980  *  and runs the watchdog check.
1981  *
1982  **********************************************************************/
1983
1984 static void
1985 ixgbe_local_timer(void *arg)
1986 {
1987         struct adapter  *adapter = arg;
1988         device_t        dev = adapter->dev;
1989         struct ifnet    *ifp = adapter->ifp;
1990         struct ix_queue *que = adapter->queues;
1991         struct tx_ring  *txr = adapter->tx_rings;
1992         int             hung, busy, paused;
1993
1994         IXGBE_CORE_LOCK(adapter);
1995         hung = busy = paused = 0;
1996
1997         /* Check for pluggable optics */
1998         if (adapter->sfp_probe)
1999                 if (!ixgbe_sfp_probe(adapter))
2000                         goto out; /* Nothing to do */
2001
2002         ixgbe_update_link_status(adapter);
2003         ixgbe_update_stats_counters(adapter);
2004
2005         /*
2006          * If the interface has been paused
2007          * then don't do the watchdog check
2008          */
2009         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2010                 paused = 1;
2011
2012         /*
2013         ** Check the TX queues status
2014         **      - central locked handling of OACTIVE
2015         **      - watchdog only if all queues show hung
2016         */          
2017         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2018                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
2019                     (paused == 0))
2020                         ++hung;
2021                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
2022                         ++busy;
2023                 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
2024                         taskqueue_enqueue(que->tq, &que->que_task);
2025         }
2026         /* Only truely watchdog if all queues show hung */
2027         if (hung == adapter->num_queues)
2028                 goto watchdog;
2029         /* Only turn off the stack flow when ALL are depleted */
2030         if (busy == adapter->num_queues)
2031                 ifp->if_flags |= IFF_OACTIVE;
2032         else if ((ifp->if_flags & IFF_OACTIVE) &&
2033             (busy < adapter->num_queues))
2034                 ifp->if_flags &= ~IFF_OACTIVE;
2035
2036 out:
2037         ixgbe_rearm_queues(adapter, adapter->que_mask);
2038         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2039         IXGBE_CORE_UNLOCK(adapter);
2040         return;
2041
2042 watchdog:
2043         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2044         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2045             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2046             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2047         device_printf(dev,"TX(%d) desc avail = %d,"
2048             "Next TX to Clean = %d\n",
2049             txr->me, txr->tx_avail, txr->next_to_clean);
2050         adapter->ifp->if_flags &= ~IFF_RUNNING;
2051         adapter->watchdog_events++;
2052         ixgbe_init_locked(adapter);
2053
2054         IXGBE_CORE_UNLOCK(adapter);
2055 }
2056
2057 /*
2058 ** Note: this routine updates the OS on the link state
2059 **      the real check of the hardware only happens with
2060 **      a link interrupt.
2061 */
2062 static void
2063 ixgbe_update_link_status(struct adapter *adapter)
2064 {
2065         struct ifnet    *ifp = adapter->ifp;
2066         struct tx_ring *txr = adapter->tx_rings;
2067         device_t dev = adapter->dev;
2068
2069
2070         if (adapter->link_up){ 
2071                 if (adapter->link_active == FALSE) {
2072                         if (bootverbose)
2073                                 device_printf(dev,"Link is up %d Gbps %s \n",
2074                                     ((adapter->link_speed == 128)? 10:1),
2075                                     "Full Duplex");
2076                         adapter->link_active = TRUE;
2077                         /* Update any Flow Control changes */
2078                         ixgbe_fc_enable(&adapter->hw);
2079                         ifp->if_link_state = LINK_STATE_UP;
2080                         if_link_state_change(ifp);
2081                 }
2082         } else { /* Link down */
2083                 if (adapter->link_active == TRUE) {
2084                         if (bootverbose)
2085                                 device_printf(dev,"Link is Down\n");
2086                         ifp->if_link_state = LINK_STATE_DOWN;
2087                         if_link_state_change(ifp);
2088                         adapter->link_active = FALSE;
2089                         for (int i = 0; i < adapter->num_queues;
2090                             i++, txr++)
2091                                 txr->queue_status = IXGBE_QUEUE_IDLE;
2092                 }
2093         }
2094
2095         return;
2096 }
2097
2098
2099 /*********************************************************************
2100  *
2101  *  This routine disables all traffic on the adapter by issuing a
2102  *  global reset on the MAC and deallocates TX/RX buffers.
2103  *
2104  **********************************************************************/
2105
2106 static void
2107 ixgbe_stop(void *arg)
2108 {
2109         struct ifnet   *ifp;
2110         struct adapter *adapter = arg;
2111         struct ixgbe_hw *hw = &adapter->hw;
2112         ifp = adapter->ifp;
2113
2114         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2115
2116         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2117         ixgbe_disable_intr(adapter);
2118         callout_stop(&adapter->timer);
2119
2120         /* Let the stack know...*/
2121         ifp->if_flags &= ~IFF_RUNNING;
2122         ifp->if_flags |= IFF_OACTIVE;
2123
2124         ixgbe_reset_hw(hw);
2125         hw->adapter_stopped = FALSE;
2126         ixgbe_stop_adapter(hw);
2127         /* Turn off the laser */
2128         if (hw->phy.multispeed_fiber)
2129                 ixgbe_disable_tx_laser(hw);
2130
2131         /* reprogram the RAR[0] in case user changed it. */
2132         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2133
2134         return;
2135 }
2136
2137
2138 /*********************************************************************
2139  *
2140  *  Determine hardware revision.
2141  *
2142  **********************************************************************/
2143 static void
2144 ixgbe_identify_hardware(struct adapter *adapter)
2145 {
2146         device_t        dev = adapter->dev;
2147         struct ixgbe_hw *hw = &adapter->hw;
2148
2149         /* Save off the information about this board */
2150         hw->vendor_id = pci_get_vendor(dev);
2151         hw->device_id = pci_get_device(dev);
2152         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2153         hw->subsystem_vendor_id =
2154             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2155         hw->subsystem_device_id =
2156             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2157
2158         /* We need this here to set the num_segs below */
2159         ixgbe_set_mac_type(hw);
2160
2161         /* Pick up the 82599 and VF settings */
2162         if (hw->mac.type != ixgbe_mac_82598EB) {
2163                 hw->phy.smart_speed = ixgbe_smart_speed;
2164                 adapter->num_segs = IXGBE_82599_SCATTER;
2165         } else
2166                 adapter->num_segs = IXGBE_82598_SCATTER;
2167
2168         return;
2169 }
2170
2171 /*********************************************************************
2172  *
2173  *  Determine optic type
2174  *
2175  **********************************************************************/
2176 static void
2177 ixgbe_setup_optics(struct adapter *adapter)
2178 {
2179         struct ixgbe_hw *hw = &adapter->hw;
2180         int             layer;
2181         
2182         layer = ixgbe_get_supported_physical_layer(hw);
2183
2184         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2185                 adapter->optics = IFM_10G_T;
2186                 return;
2187         }
2188
2189         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2190                 adapter->optics = IFM_1000_T;
2191                 return;
2192         }
2193
2194         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2195             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2196                 adapter->optics = IFM_10G_LR;
2197                 return;
2198         }
2199
2200         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2201                 adapter->optics = IFM_10G_SR;
2202                 return;
2203         }
2204
2205         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2206                 adapter->optics = IFM_10G_TWINAX;
2207                 return;
2208         }
2209
2210         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2211             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2212                 adapter->optics = IFM_10G_CX4;
2213                 return;
2214         }
2215
2216         /* If we get here just set the default */
2217         adapter->optics = IFM_ETHER | IFM_AUTO;
2218         return;
2219 }
2220
2221 /*********************************************************************
2222  *
2223  *  Setup the Legacy or MSI Interrupt handler
2224  *
2225  **********************************************************************/
2226 static int
2227 ixgbe_allocate_legacy(struct adapter *adapter)
2228 {
2229         device_t dev = adapter->dev;
2230         struct          ix_queue *que = adapter->queues;
2231         int error, rid = 0;
2232         unsigned int intr_flags;
2233
2234         /* MSI RID at 1 */
2235         if (adapter->msix == 1)
2236                 rid = 1;
2237
2238         /* Try allocating a MSI interrupt first */
2239         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2240                 &rid, &intr_flags);
2241
2242         /* We allocate a single interrupt resource */
2243         adapter->res = bus_alloc_resource_any(dev,
2244             SYS_RES_IRQ, &rid, intr_flags);
2245         if (adapter->res == NULL) {
2246                 device_printf(dev, "Unable to allocate bus resource: "
2247                     "interrupt\n");
2248                 return (ENXIO);
2249         }
2250
2251         /*
2252          * Try allocating a fast interrupt and the associated deferred
2253          * processing contexts.
2254          */
2255         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2256         que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2257             taskqueue_thread_enqueue, &que->tq);
2258         taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s ixq",
2259             device_get_nameunit(adapter->dev));
2260
2261         /* Tasklets for Link, SFP and Multispeed Fiber */
2262         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2263         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2264         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2265 #ifdef IXGBE_FDIR
2266         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2267 #endif
2268         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2269             taskqueue_thread_enqueue, &adapter->tq);
2270         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2271             device_get_nameunit(adapter->dev));
2272
2273         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2274             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2275                 device_printf(dev, "Failed to register fast interrupt "
2276                     "handler: %d\n", error);
2277                 taskqueue_free(que->tq);
2278                 taskqueue_free(adapter->tq);
2279                 que->tq = NULL;
2280                 adapter->tq = NULL;
2281                 return (error);
2282         }
2283         /* For simplicity in the handlers */
2284         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2285
2286         return (0);
2287 }
2288
2289
2290 /*********************************************************************
2291  *
2292  *  Setup MSIX Interrupt resources and handlers 
2293  *
2294  **********************************************************************/
2295 static int
2296 ixgbe_allocate_msix(struct adapter *adapter)
2297 {
2298         device_t        dev = adapter->dev;
2299         struct          ix_queue *que = adapter->queues;
2300         int             error, rid, vector = 0;
2301
2302         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2303                 rid = vector + 1;
2304                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2305                     RF_SHAREABLE | RF_ACTIVE);
2306                 if (que->res == NULL) {
2307                         device_printf(dev,"Unable to allocate"
2308                             " bus resource: que interrupt [%d]\n", vector);
2309                         return (ENXIO);
2310                 }
2311                 /* Set the handler function */
2312                 error = bus_setup_intr(dev, que->res, INTR_MPSAFE,
2313                     ixgbe_msix_que, que, &que->tag, &que->serializer);
2314                 if (error) {
2315                         que->res = NULL;
2316                         device_printf(dev, "Failed to register QUE handler");
2317                         return (error);
2318                 }
2319 #if 0 /* __FreeBSD_version >= 800504 */
2320                 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2321 #endif
2322                 que->msix = vector;
2323                 adapter->que_mask |= (u64)(1 << que->msix);
2324                 /*
2325                 ** Bind the msix vector, and thus the
2326                 ** ring to the corresponding cpu.
2327                 */
2328 #if 0 /* XXX */
2329                 if (adapter->num_queues > 1)
2330                         bus_bind_intr(dev, que->res, i);
2331 #endif
2332
2333                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2334                 que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2335                     taskqueue_thread_enqueue, &que->tq);
2336                 taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s que",
2337                     device_get_nameunit(adapter->dev));
2338         }
2339
2340         /* and Link */
2341         rid = vector + 1;
2342         adapter->res = bus_alloc_resource_any(dev,
2343             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2344         if (!adapter->res) {
2345                 device_printf(dev,"Unable to allocate"
2346             " bus resource: Link interrupt [%d]\n", rid);
2347                 return (ENXIO);
2348         }
2349         /* Set the link handler function */
2350         error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2351             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer);
2352         if (error) {
2353                 adapter->res = NULL;
2354                 device_printf(dev, "Failed to register LINK handler");
2355                 return (error);
2356         }
2357 #if 0 /* __FreeBSD_version >= 800504 */
2358         bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2359 #endif
2360         adapter->linkvec = vector;
2361         /* Tasklets for Link, SFP and Multispeed Fiber */
2362         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2363         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2364         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2365 #ifdef IXGBE_FDIR
2366         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2367 #endif
2368         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2369             taskqueue_thread_enqueue, &adapter->tq);
2370         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2371             device_get_nameunit(adapter->dev));
2372
2373         return (0);
2374 }
2375
2376 #if 0   /* HAVE_MSIX */
2377 /*
2378  * Setup Either MSI/X or MSI
2379  */
2380 static int
2381 ixgbe_setup_msix(struct adapter *adapter)
2382 {
2383         device_t dev = adapter->dev;
2384         int rid, want, queues, msgs;
2385
2386         /* Override by tuneable */
2387         if (ixgbe_enable_msix == 0)
2388                 goto msi;
2389
2390         /* First try MSI/X */
2391         rid = PCIR_BAR(MSIX_82598_BAR);
2392         adapter->msix_mem = bus_alloc_resource_any(dev,
2393             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2394         if (!adapter->msix_mem) {
2395                 rid += 4;       /* 82599 maps in higher BAR */
2396                 adapter->msix_mem = bus_alloc_resource_any(dev,
2397                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2398         }
2399         if (!adapter->msix_mem) {
2400                 /* May not be enabled */
2401                 device_printf(adapter->dev,
2402                     "Unable to map MSIX table \n");
2403                 goto msi;
2404         }
2405
2406         msgs = pci_msix_count(dev); 
2407         if (msgs == 0) { /* system has msix disabled */
2408                 bus_release_resource(dev, SYS_RES_MEMORY,
2409                     rid, adapter->msix_mem);
2410                 adapter->msix_mem = NULL;
2411                 goto msi;
2412         }
2413
2414         /* Figure out a reasonable auto config value */
2415         queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2416
2417         if (ixgbe_num_queues != 0)
2418                 queues = ixgbe_num_queues;
2419         /* Set max queues to 8 when autoconfiguring */
2420         else if ((ixgbe_num_queues == 0) && (queues > 8))
2421                 queues = 8;
2422
2423         /*
2424         ** Want one vector (RX/TX pair) per queue
2425         ** plus an additional for Link.
2426         */
2427         want = queues + 1;
2428         if (msgs >= want)
2429                 msgs = want;
2430         else {
2431                 device_printf(adapter->dev,
2432                     "MSIX Configuration Problem, "
2433                     "%d vectors but %d queues wanted!\n",
2434                     msgs, want);
2435                 return (0); /* Will go to Legacy setup */
2436         }
2437         if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2438                 device_printf(adapter->dev,
2439                     "Using MSIX interrupts with %d vectors\n", msgs);
2440                 adapter->num_queues = queues;
2441                 return (msgs);
2442         }
2443 msi:
2444         msgs = pci_msi_count(dev);
2445         if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2446                 device_printf(adapter->dev,"Using an MSI interrupt\n");
2447         else
2448                 device_printf(adapter->dev,"Using a Legacy interrupt\n");
2449         return (msgs);
2450 }
2451 #endif
2452
2453
2454 static int
2455 ixgbe_allocate_pci_resources(struct adapter *adapter)
2456 {
2457         int             rid;
2458         device_t        dev = adapter->dev;
2459
2460         rid = PCIR_BAR(0);
2461         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2462             &rid, RF_ACTIVE);
2463
2464         if (!(adapter->pci_mem)) {
2465                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2466                 return (ENXIO);
2467         }
2468
2469         adapter->osdep.mem_bus_space_tag =
2470                 rman_get_bustag(adapter->pci_mem);
2471         adapter->osdep.mem_bus_space_handle =
2472                 rman_get_bushandle(adapter->pci_mem);
2473         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2474
2475         /* Legacy defaults */
2476         adapter->num_queues = 1;
2477         adapter->hw.back = &adapter->osdep;
2478
2479         /*
2480         ** Now setup MSI or MSI/X, should
2481         ** return us the number of supported
2482         ** vectors. (Will be 1 for MSI)
2483         */
2484 #if 0   /* HAVE_MSIX */
2485         adapter->msix = ixgbe_setup_msix(adapter);
2486 #endif
2487         return (0);
2488 }
2489
2490 static void
2491 ixgbe_free_pci_resources(struct adapter * adapter)
2492 {
2493         struct          ix_queue *que = adapter->queues;
2494         device_t        dev = adapter->dev;
2495         int             rid, memrid;
2496
2497         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2498                 memrid = PCIR_BAR(MSIX_82598_BAR);
2499         else
2500                 memrid = PCIR_BAR(MSIX_82599_BAR);
2501
2502         /*
2503         ** There is a slight possibility of a failure mode
2504         ** in attach that will result in entering this function
2505         ** before interrupt resources have been initialized, and
2506         ** in that case we do not want to execute the loops below
2507         ** We can detect this reliably by the state of the adapter
2508         ** res pointer.
2509         */
2510         if (adapter->res == NULL)
2511                 goto mem;
2512
2513         /*
2514         **  Release all msix queue resources:
2515         */
2516         for (int i = 0; i < adapter->num_queues; i++, que++) {
2517                 rid = que->msix + 1;
2518                 if (que->tag != NULL) {
2519                         bus_teardown_intr(dev, que->res, que->tag);
2520                         que->tag = NULL;
2521                 }
2522                 if (que->res != NULL)
2523                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2524         }
2525
2526
2527         /* Clean the Legacy or Link interrupt last */
2528         if (adapter->linkvec) /* we are doing MSIX */
2529                 rid = adapter->linkvec + 1;
2530         else
2531                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2532
2533         if (adapter->tag != NULL) {
2534                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2535                 adapter->tag = NULL;
2536         }
2537         if (adapter->res != NULL)
2538                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2539         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2540                 pci_release_msi(adapter->dev);
2541
2542 mem:
2543         if (adapter->msix)
2544                 pci_release_msi(dev);
2545
2546         if (adapter->msix_mem != NULL)
2547                 bus_release_resource(dev, SYS_RES_MEMORY,
2548                     memrid, adapter->msix_mem);
2549
2550         if (adapter->pci_mem != NULL)
2551                 bus_release_resource(dev, SYS_RES_MEMORY,
2552                     PCIR_BAR(0), adapter->pci_mem);
2553
2554         return;
2555 }
2556
2557 /*********************************************************************
2558  *
2559  *  Setup networking device structure and register an interface.
2560  *
2561  **********************************************************************/
2562 static int
2563 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2564 {
2565         struct ixgbe_hw *hw = &adapter->hw;
2566         struct ifnet   *ifp;
2567
2568         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2569
2570         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2571         if (ifp == NULL) {
2572                 device_printf(dev, "can not allocate ifnet structure\n");
2573                 return (-1);
2574         }
2575         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2576         ifp->if_baudrate = 1000000000;
2577         ifp->if_init = ixgbe_init;
2578         ifp->if_softc = adapter;
2579         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2580         ifp->if_ioctl = ixgbe_ioctl;
2581         ifp->if_start = ixgbe_start;
2582 #if 0 /* __FreeBSD_version >= 800000 */
2583         ifp->if_transmit = ixgbe_mq_start;
2584         ifp->if_qflush = ixgbe_qflush;
2585 #endif
2586         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2587
2588         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2589
2590         adapter->max_frame_size =
2591             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2592
2593         /*
2594          * Tell the upper layer(s) we support long frames.
2595          */
2596         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2597
2598         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2599         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2600         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2601 #if 0 /* NET_TSO */
2602                              |  IFCAP_VLAN_HWTSO
2603 #endif
2604                              |  IFCAP_VLAN_MTU;
2605         ifp->if_capenable = ifp->if_capabilities;
2606
2607         /* Don't enable LRO by default */
2608 #if 0 /* NET_LRO */
2609         ifp->if_capabilities |= IFCAP_LRO;
2610 #endif
2611
2612         /*
2613         ** Don't turn this on by default, if vlans are
2614         ** created on another pseudo device (eg. lagg)
2615         ** then vlan events are not passed thru, breaking
2616         ** operation, but with HW FILTER off it works. If
2617         ** using vlans directly on the ixgbe driver you can
2618         ** enable this and get full hardware tag filtering.
2619         */
2620         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2621
2622         /*
2623          * Specify the media types supported by this adapter and register
2624          * callbacks to update media and link information
2625          */
2626         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2627                      ixgbe_media_status);
2628         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2629         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2630         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2631                 ifmedia_add(&adapter->media,
2632                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2633                 ifmedia_add(&adapter->media,
2634                     IFM_ETHER | IFM_1000_T, 0, NULL);
2635         }
2636         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2637         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2638
2639         return (0);
2640 }
2641
2642 static void
2643 ixgbe_config_link(struct adapter *adapter)
2644 {
2645         struct ixgbe_hw *hw = &adapter->hw;
2646         u32     autoneg, err = 0;
2647         bool    sfp, negotiate;
2648
2649         sfp = ixgbe_is_sfp(hw);
2650
2651         if (sfp) { 
2652                 if (hw->phy.multispeed_fiber) {
2653                         hw->mac.ops.setup_sfp(hw);
2654                         ixgbe_enable_tx_laser(hw);
2655                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2656                 } else
2657                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2658         } else {
2659                 if (hw->mac.ops.check_link)
2660                         err = ixgbe_check_link(hw, &autoneg,
2661                             &adapter->link_up, FALSE);
2662                 if (err)
2663                         goto out;
2664                 autoneg = hw->phy.autoneg_advertised;
2665                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2666                         err  = hw->mac.ops.get_link_capabilities(hw,
2667                             &autoneg, &negotiate);
2668                 if (err)
2669                         goto out;
2670                 if (hw->mac.ops.setup_link)
2671                         err = hw->mac.ops.setup_link(hw, autoneg,
2672                             negotiate, adapter->link_up);
2673         }
2674 out:
2675         return;
2676 }
2677
2678 /********************************************************************
2679  * Manage DMA'able memory.
2680  *******************************************************************/
2681 static void
2682 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2683 {
2684         if (error)
2685                 return;
2686         *(bus_addr_t *) arg = segs->ds_addr;
2687         return;
2688 }
2689
2690 static int
2691 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2692                 struct ixgbe_dma_alloc *dma, int mapflags)
2693 {
2694         device_t dev = adapter->dev;
2695         int             r;
2696
2697         r = bus_dma_tag_create(NULL,    /* parent */
2698                                DBA_ALIGN, 0,    /* alignment, bounds */
2699                                BUS_SPACE_MAXADDR,       /* lowaddr */
2700                                BUS_SPACE_MAXADDR,       /* highaddr */
2701                                NULL, NULL,      /* filter, filterarg */
2702                                size,    /* maxsize */
2703                                1,       /* nsegments */
2704                                size,    /* maxsegsize */
2705                                BUS_DMA_ALLOCNOW,        /* flags */
2706                                &dma->dma_tag);
2707         if (r != 0) {
2708                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2709                        "error %u\n", r);
2710                 goto fail_0;
2711         }
2712         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2713                              BUS_DMA_NOWAIT, &dma->dma_map);
2714         if (r != 0) {
2715                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2716                        "error %u\n", r);
2717                 goto fail_1;
2718         }
2719         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2720                             size,
2721                             ixgbe_dmamap_cb,
2722                             &dma->dma_paddr,
2723                             mapflags | BUS_DMA_NOWAIT);
2724         if (r != 0) {
2725                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2726                        "error %u\n", r);
2727                 goto fail_2;
2728         }
2729         dma->dma_size = size;
2730         return (0);
2731 fail_2:
2732         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2733 fail_1:
2734         bus_dma_tag_destroy(dma->dma_tag);
2735 fail_0:
2736         dma->dma_map = NULL;
2737         dma->dma_tag = NULL;
2738         return (r);
2739 }
2740
2741 static void
2742 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2743 {
2744         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2745             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2746         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2747         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2748         bus_dma_tag_destroy(dma->dma_tag);
2749 }
2750
2751
2752 /*********************************************************************
2753  *
2754  *  Allocate memory for the transmit and receive rings, and then
2755  *  the descriptors associated with each, called only once at attach.
2756  *
2757  **********************************************************************/
2758 static int
2759 ixgbe_allocate_queues(struct adapter *adapter)
2760 {
2761         device_t        dev = adapter->dev;
2762         struct ix_queue *que;
2763         struct tx_ring  *txr;
2764         struct rx_ring  *rxr;
2765         int rsize, tsize, error = IXGBE_SUCCESS;
2766         int txconf = 0, rxconf = 0;
2767
2768         /* First allocate the top level queue structs */
2769         if (!(adapter->queues =
2770             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2771             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2772                 device_printf(dev, "Unable to allocate queue memory\n");
2773                 error = ENOMEM;
2774                 goto fail;
2775         }
2776
2777         /* First allocate the TX ring struct memory */
2778         if (!(adapter->tx_rings =
2779             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2780             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2781                 device_printf(dev, "Unable to allocate TX ring memory\n");
2782                 error = ENOMEM;
2783                 goto tx_fail;
2784         }
2785
2786         /* Next allocate the RX */
2787         if (!(adapter->rx_rings =
2788             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2789             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2790                 device_printf(dev, "Unable to allocate RX ring memory\n");
2791                 error = ENOMEM;
2792                 goto rx_fail;
2793         }
2794
2795         /* For the ring itself */
2796         tsize = roundup2(adapter->num_tx_desc *
2797             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2798
2799         /*
2800          * Now set up the TX queues, txconf is needed to handle the
2801          * possibility that things fail midcourse and we need to
2802          * undo memory gracefully
2803          */ 
2804         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2805                 /* Set up some basics */
2806                 txr = &adapter->tx_rings[i];
2807                 txr->adapter = adapter;
2808                 txr->me = i;
2809
2810                 /* Initialize the TX side lock */
2811                 ksnprintf(txr->lock_name, sizeof(txr->lock_name), "%s:tx(%d)",
2812                     device_get_nameunit(dev), txr->me);
2813                 lockinit(&txr->tx_lock, txr->lock_name, 0, LK_CANRECURSE);
2814
2815                 if (ixgbe_dma_malloc(adapter, tsize,
2816                         &txr->txdma, BUS_DMA_NOWAIT)) {
2817                         device_printf(dev,
2818                             "Unable to allocate TX Descriptor memory\n");
2819                         error = ENOMEM;
2820                         goto err_tx_desc;
2821                 }
2822                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2823                 bzero((void *)txr->tx_base, tsize);
2824
2825                 /* Now allocate transmit buffers for the ring */
2826                 if (ixgbe_allocate_transmit_buffers(txr)) {
2827                         device_printf(dev,
2828                             "Critical Failure setting up transmit buffers\n");
2829                         error = ENOMEM;
2830                         goto err_tx_desc;
2831                 }
2832 #if 0 /* __FreeBSD_version >= 800000 */
2833                 /* Allocate a buf ring */
2834                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2835                     M_WAITOK, &txr->tx_mtx);
2836                 if (txr->br == NULL) {
2837                         device_printf(dev,
2838                             "Critical Failure setting up buf ring\n");
2839                         error = ENOMEM;
2840                         goto err_tx_desc;
2841                 }
2842 #endif
2843         }
2844
2845         /*
2846          * Next the RX queues...
2847          */ 
2848         rsize = roundup2(adapter->num_rx_desc *
2849             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2850         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2851                 rxr = &adapter->rx_rings[i];
2852                 /* Set up some basics */
2853                 rxr->adapter = adapter;
2854                 rxr->me = i;
2855
2856                 /* Initialize the RX side lock */
2857                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2858                     device_get_nameunit(dev), rxr->me);
2859                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2860
2861                 if (ixgbe_dma_malloc(adapter, rsize,
2862                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2863                         device_printf(dev,
2864                             "Unable to allocate RxDescriptor memory\n");
2865                         error = ENOMEM;
2866                         goto err_rx_desc;
2867                 }
2868                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2869                 bzero((void *)rxr->rx_base, rsize);
2870
2871                 /* Allocate receive buffers for the ring*/
2872                 if (ixgbe_allocate_receive_buffers(rxr)) {
2873                         device_printf(dev,
2874                             "Critical Failure setting up receive buffers\n");
2875                         error = ENOMEM;
2876                         goto err_rx_desc;
2877                 }
2878         }
2879
2880         /*
2881         ** Finally set up the queue holding structs
2882         */
2883         for (int i = 0; i < adapter->num_queues; i++) {
2884                 que = &adapter->queues[i];
2885                 que->adapter = adapter;
2886                 que->txr = &adapter->tx_rings[i];
2887                 que->rxr = &adapter->rx_rings[i];
2888         }
2889
2890         return (0);
2891
2892 err_rx_desc:
2893         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2894                 ixgbe_dma_free(adapter, &rxr->rxdma);
2895 err_tx_desc:
2896         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2897                 ixgbe_dma_free(adapter, &txr->txdma);
2898         kfree(adapter->rx_rings, M_DEVBUF);
2899 rx_fail:
2900         kfree(adapter->tx_rings, M_DEVBUF);
2901 tx_fail:
2902         kfree(adapter->queues, M_DEVBUF);
2903 fail:
2904         return (error);
2905 }
2906
2907 /*********************************************************************
2908  *
2909  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2910  *  the information needed to transmit a packet on the wire. This is
2911  *  called only once at attach, setup is done every reset.
2912  *
2913  **********************************************************************/
2914 static int
2915 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2916 {
2917         struct adapter *adapter = txr->adapter;
2918         device_t dev = adapter->dev;
2919         struct ixgbe_tx_buf *txbuf;
2920         int error, i;
2921
2922         /*
2923          * Setup DMA descriptor areas.
2924          */
2925         if ((error = bus_dma_tag_create(
2926                                NULL,    /* parent */
2927                                1, 0,            /* alignment, bounds */
2928                                BUS_SPACE_MAXADDR,       /* lowaddr */
2929                                BUS_SPACE_MAXADDR,       /* highaddr */
2930                                NULL, NULL,              /* filter, filterarg */
2931                                IXGBE_TSO_SIZE,          /* maxsize */
2932                                adapter->num_segs,       /* nsegments */
2933                                PAGE_SIZE,               /* maxsegsize */
2934                                0,                       /* flags */
2935                                &txr->txtag))) {
2936                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2937                 goto fail;
2938         }
2939
2940         if (!(txr->tx_buffers =
2941             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2942             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2943                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2944                 error = ENOMEM;
2945                 goto fail;
2946         }
2947
2948         /* Create the descriptor buffer dma maps */
2949         txbuf = txr->tx_buffers;
2950         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2951                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2952                 if (error != 0) {
2953                         device_printf(dev, "Unable to create TX DMA map\n");
2954                         goto fail;
2955                 }
2956         }
2957
2958         return 0;
2959 fail:
2960         /* We free all, it handles case where we are in the middle */
2961         ixgbe_free_transmit_structures(adapter);
2962         return (error);
2963 }
2964
2965 /*********************************************************************
2966  *
2967  *  Initialize a transmit ring.
2968  *
2969  **********************************************************************/
2970 static void
2971 ixgbe_setup_transmit_ring(struct tx_ring *txr)
2972 {
2973         struct adapter *adapter = txr->adapter;
2974         struct ixgbe_tx_buf *txbuf;
2975         int i;
2976 #ifdef DEV_NETMAP
2977         struct netmap_adapter *na = NA(adapter->ifp);
2978         struct netmap_slot *slot;
2979 #endif /* DEV_NETMAP */
2980
2981         /* Clear the old ring contents */
2982         IXGBE_TX_LOCK(txr);
2983 #ifdef DEV_NETMAP
2984         /*
2985          * (under lock): if in netmap mode, do some consistency
2986          * checks and set slot to entry 0 of the netmap ring.
2987          */
2988         slot = netmap_reset(na, NR_TX, txr->me, 0);
2989 #endif /* DEV_NETMAP */
2990         bzero((void *)txr->tx_base,
2991               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
2992         /* Reset indices */
2993         txr->next_avail_desc = 0;
2994         txr->next_to_clean = 0;
2995
2996         /* Free any existing tx buffers. */
2997         txbuf = txr->tx_buffers;
2998         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2999                 if (txbuf->m_head != NULL) {
3000                         bus_dmamap_sync(txr->txtag, txbuf->map,
3001                             BUS_DMASYNC_POSTWRITE);
3002                         bus_dmamap_unload(txr->txtag, txbuf->map);
3003                         m_freem(txbuf->m_head);
3004                         txbuf->m_head = NULL;
3005                 }
3006 #ifdef DEV_NETMAP
3007                 /*
3008                  * In netmap mode, set the map for the packet buffer.
3009                  * NOTE: Some drivers (not this one) also need to set
3010                  * the physical buffer address in the NIC ring.
3011                  * Slots in the netmap ring (indexed by "si") are
3012                  * kring->nkr_hwofs positions "ahead" wrt the
3013                  * corresponding slot in the NIC ring. In some drivers
3014                  * (not here) nkr_hwofs can be negative. Function
3015                  * netmap_idx_n2k() handles wraparounds properly.
3016                  */
3017                 if (slot) {
3018                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3019                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3020                 }
3021 #endif /* DEV_NETMAP */
3022                 /* Clear the EOP index */
3023                 txbuf->eop_index = -1;
3024         }
3025
3026 #ifdef IXGBE_FDIR
3027         /* Set the rate at which we sample packets */
3028         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3029                 txr->atr_sample = atr_sample_rate;
3030 #endif
3031
3032         /* Set number of descriptors available */
3033         txr->tx_avail = adapter->num_tx_desc;
3034
3035         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3036             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3037         IXGBE_TX_UNLOCK(txr);
3038 }
3039
3040 /*********************************************************************
3041  *
3042  *  Initialize all transmit rings.
3043  *
3044  **********************************************************************/
3045 static int
3046 ixgbe_setup_transmit_structures(struct adapter *adapter)
3047 {
3048         struct tx_ring *txr = adapter->tx_rings;
3049
3050         for (int i = 0; i < adapter->num_queues; i++, txr++)
3051                 ixgbe_setup_transmit_ring(txr);
3052
3053         return (0);
3054 }
3055
3056 /*********************************************************************
3057  *
3058  *  Enable transmit unit.
3059  *
3060  **********************************************************************/
3061 static void
3062 ixgbe_initialize_transmit_units(struct adapter *adapter)
3063 {
3064         struct tx_ring  *txr = adapter->tx_rings;
3065         struct ixgbe_hw *hw = &adapter->hw;
3066
3067         /* Setup the Base and Length of the Tx Descriptor Ring */
3068
3069         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3070                 u64     tdba = txr->txdma.dma_paddr;
3071                 u32     txctrl;
3072
3073                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3074                        (tdba & 0x00000000ffffffffULL));
3075                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3076                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3077                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
3078
3079                 /* Setup the HW Tx Head and Tail descriptor pointers */
3080                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3081                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3082
3083                 /* Setup Transmit Descriptor Cmd Settings */
3084                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3085                 txr->queue_status = IXGBE_QUEUE_IDLE;
3086
3087                 /* Disable Head Writeback */
3088                 switch (hw->mac.type) {
3089                 case ixgbe_mac_82598EB:
3090                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3091                         break;
3092                 case ixgbe_mac_82599EB:
3093                 case ixgbe_mac_X540:
3094                 default:
3095                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3096                         break;
3097                 }
3098                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3099                 switch (hw->mac.type) {
3100                 case ixgbe_mac_82598EB:
3101                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3102                         break;
3103                 case ixgbe_mac_82599EB:
3104                 case ixgbe_mac_X540:
3105                 default:
3106                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3107                         break;
3108                 }
3109
3110         }
3111
3112         if (hw->mac.type != ixgbe_mac_82598EB) {
3113                 u32 dmatxctl, rttdcs;
3114                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3115                 dmatxctl |= IXGBE_DMATXCTL_TE;
3116                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3117                 /* Disable arbiter to set MTQC */
3118                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3119                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3120                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3121                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3122                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3123                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3124         }
3125
3126         return;
3127 }
3128
3129 /*********************************************************************
3130  *
3131  *  Free all transmit rings.
3132  *
3133  **********************************************************************/
3134 static void
3135 ixgbe_free_transmit_structures(struct adapter *adapter)
3136 {
3137         struct tx_ring *txr = adapter->tx_rings;
3138
3139         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3140                 IXGBE_TX_LOCK(txr);
3141                 ixgbe_free_transmit_buffers(txr);
3142                 ixgbe_dma_free(adapter, &txr->txdma);
3143                 IXGBE_TX_UNLOCK(txr);
3144                 IXGBE_TX_LOCK_DESTROY(txr);
3145         }
3146         kfree(adapter->tx_rings, M_DEVBUF);
3147 }
3148
3149 /*********************************************************************
3150  *
3151  *  Free transmit ring related data structures.
3152  *
3153  **********************************************************************/
3154 static void
3155 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3156 {
3157         struct adapter *adapter = txr->adapter;
3158         struct ixgbe_tx_buf *tx_buffer;
3159         int             i;
3160
3161         INIT_DEBUGOUT("free_transmit_ring: begin");
3162
3163         if (txr->tx_buffers == NULL)
3164                 return;
3165
3166         tx_buffer = txr->tx_buffers;
3167         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3168                 if (tx_buffer->m_head != NULL) {
3169                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3170                             BUS_DMASYNC_POSTWRITE);
3171                         bus_dmamap_unload(txr->txtag,
3172                             tx_buffer->map);
3173                         m_freem(tx_buffer->m_head);
3174                         tx_buffer->m_head = NULL;
3175                         if (tx_buffer->map != NULL) {
3176                                 bus_dmamap_destroy(txr->txtag,
3177                                     tx_buffer->map);
3178                                 tx_buffer->map = NULL;
3179                         }
3180                 } else if (tx_buffer->map != NULL) {
3181                         bus_dmamap_unload(txr->txtag,
3182                             tx_buffer->map);
3183                         bus_dmamap_destroy(txr->txtag,
3184                             tx_buffer->map);
3185                         tx_buffer->map = NULL;
3186                 }
3187         }
3188 #if 0 /* __FreeBSD_version >= 800000 */
3189         if (txr->br != NULL)
3190                 buf_ring_free(txr->br, M_DEVBUF);
3191 #endif
3192         if (txr->tx_buffers != NULL) {
3193                 kfree(txr->tx_buffers, M_DEVBUF);
3194                 txr->tx_buffers = NULL;
3195         }
3196         if (txr->txtag != NULL) {
3197                 bus_dma_tag_destroy(txr->txtag);
3198                 txr->txtag = NULL;
3199         }
3200         return;
3201 }
3202
3203 /*********************************************************************
3204  *
3205  *  Advanced Context Descriptor setup for VLAN or CSUM
3206  *
3207  **********************************************************************/
3208
3209 static bool
3210 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3211 {
3212         struct adapter *adapter = txr->adapter;
3213         struct ixgbe_adv_tx_context_desc *TXD;
3214         struct ixgbe_tx_buf        *tx_buffer;
3215         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3216         struct ether_vlan_header *eh;
3217         struct ip *ip;
3218         struct ip6_hdr *ip6;
3219         int  ehdrlen, ip_hlen = 0;
3220         u16     etype;
3221         u8      ipproto = 0;
3222         bool    offload = TRUE;
3223         int ctxd = txr->next_avail_desc;
3224         u16 vtag = 0;
3225
3226
3227         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3228                 offload = FALSE;
3229
3230         tx_buffer = &txr->tx_buffers[ctxd];
3231         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3232
3233         /*
3234         ** In advanced descriptors the vlan tag must 
3235         ** be placed into the descriptor itself.
3236         */
3237         if (mp->m_flags & M_VLANTAG) {
3238                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3239                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3240         } else if (offload == FALSE)
3241                 return FALSE;
3242
3243         /*
3244          * Determine where frame payload starts.
3245          * Jump over vlan headers if already present,
3246          * helpful for QinQ too.
3247          */
3248         eh = mtod(mp, struct ether_vlan_header *);
3249         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3250                 etype = ntohs(eh->evl_proto);
3251                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3252         } else {
3253                 etype = ntohs(eh->evl_encap_proto);
3254                 ehdrlen = ETHER_HDR_LEN;
3255         }
3256
3257         /* Set the ether header length */
3258         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3259
3260         switch (etype) {
3261                 case ETHERTYPE_IP:
3262                         ip = (struct ip *)(mp->m_data + ehdrlen);
3263                         ip_hlen = ip->ip_hl << 2;
3264                         ipproto = ip->ip_p;
3265                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3266                         break;
3267                 case ETHERTYPE_IPV6:
3268                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3269                         ip_hlen = sizeof(struct ip6_hdr);
3270                         /* XXX-BZ this will go badly in case of ext hdrs. */
3271                         ipproto = ip6->ip6_nxt;
3272                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3273                         break;
3274                 default:
3275                         offload = FALSE;
3276                         break;
3277         }
3278
3279         vlan_macip_lens |= ip_hlen;
3280         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3281
3282         switch (ipproto) {
3283                 case IPPROTO_TCP:
3284                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3285                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3286                         break;
3287
3288                 case IPPROTO_UDP:
3289                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3290                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3291                         break;
3292
3293 #if 0
3294                 case IPPROTO_SCTP:
3295                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3296                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3297                         break;
3298 #endif
3299                 default:
3300                         offload = FALSE;
3301                         break;
3302         }
3303
3304         /* Now copy bits into descriptor */
3305         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3306         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3307         TXD->seqnum_seed = htole32(0);
3308         TXD->mss_l4len_idx = htole32(0);
3309
3310         tx_buffer->m_head = NULL;
3311         tx_buffer->eop_index = -1;
3312
3313         /* We've consumed the first desc, adjust counters */
3314         if (++ctxd == adapter->num_tx_desc)
3315                 ctxd = 0;
3316         txr->next_avail_desc = ctxd;
3317         --txr->tx_avail;
3318
3319         return (offload);
3320 }
3321
3322 /**********************************************************************
3323  *
3324  *  Setup work for hardware segmentation offload (TSO) on
3325  *  adapters using advanced tx descriptors
3326  *
3327  **********************************************************************/
3328 static bool
3329 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3330     u32 *olinfo_status)
3331 {
3332         struct adapter *adapter = txr->adapter;
3333         struct ixgbe_adv_tx_context_desc *TXD;
3334         struct ixgbe_tx_buf        *tx_buffer;
3335         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3336         u16 vtag = 0, eh_type;
3337         u32 mss_l4len_idx = 0, len;
3338         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3339         struct ether_vlan_header *eh;
3340 #if 0 /* IPv6 TSO */
3341 #ifdef INET6
3342         struct ip6_hdr *ip6;
3343 #endif
3344 #endif
3345 #ifdef INET
3346         struct ip *ip;
3347 #endif
3348         struct tcphdr *th;
3349
3350
3351         /*
3352          * Determine where frame payload starts.
3353          * Jump over vlan headers if already present
3354          */
3355         eh = mtod(mp, struct ether_vlan_header *);
3356         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3357                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3358                 eh_type = eh->evl_proto;
3359         } else {
3360                 ehdrlen = ETHER_HDR_LEN;
3361                 eh_type = eh->evl_encap_proto;
3362         }
3363
3364         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3365         len = ehdrlen + sizeof(struct tcphdr);
3366         switch (ntohs(eh_type)) {
3367 #if 0 /* IPv6 TSO */
3368 #ifdef INET6
3369         case ETHERTYPE_IPV6:
3370                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3371                         return FALSE;
3372                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3373                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3374                 if (ip6->ip6_nxt != IPPROTO_TCP)
3375                         return FALSE;
3376                 ip_hlen = sizeof(struct ip6_hdr);
3377                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3378                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3379                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3380                 break;
3381 #endif
3382 #endif
3383 #ifdef INET
3384         case ETHERTYPE_IP:
3385                 if (mp->m_len < len + sizeof(struct ip))
3386                         return FALSE;
3387                 ip = (struct ip *)(mp->m_data + ehdrlen);
3388                 if (ip->ip_p != IPPROTO_TCP)
3389                         return FALSE;
3390                 ip->ip_sum = 0;
3391                 ip_hlen = ip->ip_hl << 2;
3392                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3393                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3394                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3395                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3396                 /* Tell transmit desc to also do IPv4 checksum. */
3397                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3398                 break;
3399 #endif
3400         default:
3401                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3402                     __func__, ntohs(eh_type));
3403                 break;
3404         }
3405
3406         ctxd = txr->next_avail_desc;
3407         tx_buffer = &txr->tx_buffers[ctxd];
3408         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3409
3410         tcp_hlen = th->th_off << 2;
3411
3412         /* This is used in the transmit desc in encap */
3413         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3414
3415         /* VLAN MACLEN IPLEN */
3416         if (mp->m_flags & M_VLANTAG) {
3417                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3418                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3419         }
3420
3421         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3422         vlan_macip_lens |= ip_hlen;
3423         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3424
3425         /* ADV DTYPE TUCMD */
3426         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3427         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3428         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3429
3430         /* MSS L4LEN IDX */
3431         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3432         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3433         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3434
3435         TXD->seqnum_seed = htole32(0);
3436         tx_buffer->m_head = NULL;
3437         tx_buffer->eop_index = -1;
3438
3439         if (++ctxd == adapter->num_tx_desc)
3440                 ctxd = 0;
3441
3442         txr->tx_avail--;
3443         txr->next_avail_desc = ctxd;
3444         return TRUE;
3445 }
3446
3447 #ifdef IXGBE_FDIR
3448 /*
3449 ** This routine parses packet headers so that Flow
3450 ** Director can make a hashed filter table entry 
3451 ** allowing traffic flows to be identified and kept
3452 ** on the same cpu.  This would be a performance
3453 ** hit, but we only do it at IXGBE_FDIR_RATE of
3454 ** packets.
3455 */
3456 static void
3457 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3458 {
3459         struct adapter                  *adapter = txr->adapter;
3460         struct ix_queue                 *que;
3461         struct ip                       *ip;
3462         struct tcphdr                   *th;
3463         struct udphdr                   *uh;
3464         struct ether_vlan_header        *eh;
3465         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3466         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3467         int                             ehdrlen, ip_hlen;
3468         u16                             etype;
3469
3470         eh = mtod(mp, struct ether_vlan_header *);
3471         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3472                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3473                 etype = eh->evl_proto;
3474         } else {
3475                 ehdrlen = ETHER_HDR_LEN;
3476                 etype = eh->evl_encap_proto;
3477         }
3478
3479         /* Only handling IPv4 */
3480         if (etype != htons(ETHERTYPE_IP))
3481                 return;
3482
3483         ip = (struct ip *)(mp->m_data + ehdrlen);
3484         ip_hlen = ip->ip_hl << 2;
3485
3486         /* check if we're UDP or TCP */
3487         switch (ip->ip_p) {
3488         case IPPROTO_TCP:
3489                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3490                 /* src and dst are inverted */
3491                 common.port.dst ^= th->th_sport;
3492                 common.port.src ^= th->th_dport;
3493                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3494                 break;
3495         case IPPROTO_UDP:
3496                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3497                 /* src and dst are inverted */
3498                 common.port.dst ^= uh->uh_sport;
3499                 common.port.src ^= uh->uh_dport;
3500                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3501                 break;
3502         default:
3503                 return;
3504         }
3505
3506         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3507         if (mp->m_pkthdr.ether_vtag)
3508                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3509         else
3510                 common.flex_bytes ^= etype;
3511         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3512
3513         que = &adapter->queues[txr->me];
3514         /*
3515         ** This assumes the Rx queue and Tx
3516         ** queue are bound to the same CPU
3517         */
3518         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3519             input, common, que->msix);
3520 }
3521 #endif /* IXGBE_FDIR */
3522
3523 /**********************************************************************
3524  *
3525  *  Examine each tx_buffer in the used queue. If the hardware is done
3526  *  processing the packet then free associated resources. The
3527  *  tx_buffer is put back on the free queue.
3528  *
3529  **********************************************************************/
3530 static bool
3531 ixgbe_txeof(struct tx_ring *txr)
3532 {
3533         struct adapter  *adapter = txr->adapter;
3534         struct ifnet    *ifp = adapter->ifp;
3535         u32     first, last, done, processed;
3536         struct ixgbe_tx_buf *tx_buffer;
3537         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3538
3539         KKASSERT(lockstatus(&txr->tx_lock, curthread) != 0);
3540
3541 #ifdef DEV_NETMAP
3542         if (ifp->if_capenable & IFCAP_NETMAP) {
3543                 struct netmap_adapter *na = NA(ifp);
3544                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3545
3546                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3547
3548                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3549                     BUS_DMASYNC_POSTREAD);
3550                 /*
3551                  * In netmap mode, all the work is done in the context
3552                  * of the client thread. Interrupt handlers only wake up
3553                  * clients, which may be sleeping on individual rings
3554                  * or on a global resource for all rings.
3555                  * To implement tx interrupt mitigation, we wake up the client
3556                  * thread roughly every half ring, even if the NIC interrupts
3557                  * more frequently. This is implemented as follows:
3558                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3559                  *   the slot that should wake up the thread (nkr_num_slots
3560                  *   means the user thread should not be woken up);
3561                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3562                  *   or the slot has the DD bit set.
3563                  *
3564                  * When the driver has separate locks, we need to
3565                  * release and re-acquire txlock to avoid deadlocks.
3566                  * XXX see if we can find a better way.
3567                  */
3568                 if (!netmap_mitigate ||
3569                     (kring->nr_kflags < kring->nkr_num_slots &&
3570                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3571                         kring->nr_kflags = kring->nkr_num_slots;
3572                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3573                         IXGBE_TX_UNLOCK(txr);
3574                         IXGBE_CORE_LOCK(adapter);
3575                         selwakeuppri(&na->tx_si, PI_NET);
3576                         IXGBE_CORE_UNLOCK(adapter);
3577                         IXGBE_TX_LOCK(txr);
3578                 }
3579                 return FALSE;
3580         }
3581 #endif /* DEV_NETMAP */
3582
3583         if (txr->tx_avail == adapter->num_tx_desc) {
3584                 txr->queue_status = IXGBE_QUEUE_IDLE;
3585                 return FALSE;
3586         }
3587
3588         processed = 0;
3589         first = txr->next_to_clean;
3590         tx_buffer = &txr->tx_buffers[first];
3591         /* For cleanup we just use legacy struct */
3592         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3593         last = tx_buffer->eop_index;
3594         if (last == -1)
3595                 return FALSE;
3596         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3597
3598         /*
3599         ** Get the index of the first descriptor
3600         ** BEYOND the EOP and call that 'done'.
3601         ** I do this so the comparison in the
3602         ** inner while loop below can be simple
3603         */
3604         if (++last == adapter->num_tx_desc) last = 0;
3605         done = last;
3606
3607         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3608             BUS_DMASYNC_POSTREAD);
3609         /*
3610         ** Only the EOP descriptor of a packet now has the DD
3611         ** bit set, this is what we look for...
3612         */
3613         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3614                 /* We clean the range of the packet */
3615                 while (first != done) {
3616                         tx_desc->upper.data = 0;
3617                         tx_desc->lower.data = 0;
3618                         tx_desc->buffer_addr = 0;
3619                         ++txr->tx_avail;
3620                         ++processed;
3621
3622                         if (tx_buffer->m_head) {
3623                                 txr->bytes +=
3624                                     tx_buffer->m_head->m_pkthdr.len;
3625                                 bus_dmamap_sync(txr->txtag,
3626                                     tx_buffer->map,
3627                                     BUS_DMASYNC_POSTWRITE);
3628                                 bus_dmamap_unload(txr->txtag,
3629                                     tx_buffer->map);
3630                                 m_freem(tx_buffer->m_head);
3631                                 tx_buffer->m_head = NULL;
3632                                 tx_buffer->map = NULL;
3633                         }
3634                         tx_buffer->eop_index = -1;
3635                         txr->watchdog_time = ticks;
3636
3637                         if (++first == adapter->num_tx_desc)
3638                                 first = 0;
3639
3640                         tx_buffer = &txr->tx_buffers[first];
3641                         tx_desc =
3642                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3643                 }
3644                 ++txr->packets;
3645                 ++ifp->if_opackets;
3646                 /* See if there is more work now */
3647                 last = tx_buffer->eop_index;
3648                 if (last != -1) {
3649                         eop_desc =
3650                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3651                         /* Get next done point */
3652                         if (++last == adapter->num_tx_desc) last = 0;
3653                         done = last;
3654                 } else
3655                         break;
3656         }
3657         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3658             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3659
3660         txr->next_to_clean = first;
3661
3662         /*
3663         ** Watchdog calculation, we know there's
3664         ** work outstanding or the first return
3665         ** would have been taken, so none processed
3666         ** for too long indicates a hang.
3667         */
3668         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3669                 txr->queue_status = IXGBE_QUEUE_HUNG;
3670
3671         /* With a minimum free clear the depleted state bit.  */
3672         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3673                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3674
3675         if (txr->tx_avail == adapter->num_tx_desc) {
3676                 txr->queue_status = IXGBE_QUEUE_IDLE;
3677                 return (FALSE);
3678         }
3679
3680         return TRUE;
3681 }
3682
3683 /*********************************************************************
3684  *
3685  *  Refresh mbuf buffers for RX descriptor rings
3686  *   - now keeps its own state so discards due to resource
3687  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3688  *     it just returns, keeping its placeholder, thus it can simply
3689  *     be recalled to try again.
3690  *
3691  **********************************************************************/
3692 static void
3693 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3694 {
3695         struct adapter          *adapter = rxr->adapter;
3696         bus_dma_segment_t       hseg[1];
3697         bus_dma_segment_t       pseg[1];
3698         struct ixgbe_rx_buf     *rxbuf;
3699         struct mbuf             *mh, *mp;
3700         int                     i, j, nsegs, error;
3701         bool                    refreshed = FALSE;
3702
3703         i = j = rxr->next_to_refresh;
3704         /* Control the loop with one beyond */
3705         if (++j == adapter->num_rx_desc)
3706                 j = 0;
3707
3708         while (j != limit) {
3709                 rxbuf = &rxr->rx_buffers[i];
3710                 if (rxr->hdr_split == FALSE)
3711                         goto no_split;
3712
3713                 if (rxbuf->m_head == NULL) {
3714                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3715                         if (mh == NULL)
3716                                 goto update;
3717                 } else
3718                         mh = rxbuf->m_head;
3719
3720                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3721                 mh->m_len = MHLEN;
3722                 mh->m_flags |= M_PKTHDR;
3723                 /* Get the memory mapping */
3724                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3725                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3726                 if (error != 0) {
3727                         kprintf("Refresh mbufs: hdr dmamap load"
3728                             " failure - %d\n", error);
3729                         m_free(mh);
3730                         rxbuf->m_head = NULL;
3731                         goto update;
3732                 }
3733                 rxbuf->m_head = mh;
3734                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3735                     BUS_DMASYNC_PREREAD);
3736                 rxr->rx_base[i].read.hdr_addr =
3737                     htole64(hseg[0].ds_addr);
3738
3739 no_split:
3740                 if (rxbuf->m_pack == NULL) {
3741                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3742                             M_PKTHDR, adapter->rx_mbuf_sz);
3743                         if (mp == NULL)
3744                                 goto update;
3745                 } else
3746                         mp = rxbuf->m_pack;
3747
3748                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3749                 /* Get the memory mapping */
3750                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3751                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3752                 if (error != 0) {
3753                         kprintf("Refresh mbufs: payload dmamap load"
3754                             " failure - %d\n", error);
3755                         m_free(mp);
3756                         rxbuf->m_pack = NULL;
3757                         goto update;
3758                 }
3759                 rxbuf->m_pack = mp;
3760                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3761                     BUS_DMASYNC_PREREAD);
3762                 rxr->rx_base[i].read.pkt_addr =
3763                     htole64(pseg[0].ds_addr);
3764
3765                 refreshed = TRUE;
3766                 /* Next is precalculated */
3767                 i = j;
3768                 rxr->next_to_refresh = i;
3769                 if (++j == adapter->num_rx_desc)
3770                         j = 0;
3771         }
3772 update:
3773         if (refreshed) /* Update hardware tail index */
3774                 IXGBE_WRITE_REG(&adapter->hw,
3775                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3776         return;
3777 }
3778
3779 /*********************************************************************
3780  *
3781  *  Allocate memory for rx_buffer structures. Since we use one
3782  *  rx_buffer per received packet, the maximum number of rx_buffer's
3783  *  that we'll need is equal to the number of receive descriptors
3784  *  that we've allocated.
3785  *
3786  **********************************************************************/
3787 static int
3788 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3789 {
3790         struct  adapter         *adapter = rxr->adapter;
3791         device_t                dev = adapter->dev;
3792         struct ixgbe_rx_buf     *rxbuf;
3793         int                     i, bsize, error;
3794
3795         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3796         if (!(rxr->rx_buffers =
3797             (struct ixgbe_rx_buf *) kmalloc(bsize,
3798             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3799                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3800                 error = ENOMEM;
3801                 goto fail;
3802         }
3803
3804         if ((error = bus_dma_tag_create(NULL,   /* parent */
3805                                    1, 0,        /* alignment, bounds */
3806                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3807                                    BUS_SPACE_MAXADDR,   /* highaddr */
3808                                    NULL, NULL,          /* filter, filterarg */
3809                                    MSIZE,               /* maxsize */
3810                                    1,                   /* nsegments */
3811                                    MSIZE,               /* maxsegsize */
3812                                    0,                   /* flags */
3813                                    &rxr->htag))) {
3814                 device_printf(dev, "Unable to create RX DMA tag\n");
3815                 goto fail;
3816         }
3817
3818         if ((error = bus_dma_tag_create(NULL,   /* parent */
3819                                    1, 0,        /* alignment, bounds */
3820                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3821                                    BUS_SPACE_MAXADDR,   /* highaddr */
3822                                    NULL, NULL,          /* filter, filterarg */
3823                                    MJUM16BYTES,         /* maxsize */
3824                                    1,                   /* nsegments */
3825                                    MJUM16BYTES,         /* maxsegsize */
3826                                    0,                   /* flags */
3827                                    &rxr->ptag))) {
3828                 device_printf(dev, "Unable to create RX DMA tag\n");
3829                 goto fail;
3830         }
3831
3832         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3833                 rxbuf = &rxr->rx_buffers[i];
3834                 error = bus_dmamap_create(rxr->htag,
3835                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3836                 if (error) {
3837                         device_printf(dev, "Unable to create RX head map\n");
3838                         goto fail;
3839                 }
3840                 error = bus_dmamap_create(rxr->ptag,
3841                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3842                 if (error) {
3843                         device_printf(dev, "Unable to create RX pkt map\n");
3844                         goto fail;
3845                 }
3846         }
3847
3848         return (0);
3849
3850 fail:
3851         /* Frees all, but can handle partial completion */
3852         ixgbe_free_receive_structures(adapter);
3853         return (error);
3854 }
3855
3856 /*
3857 ** Used to detect a descriptor that has
3858 ** been merged by Hardware RSC.
3859 */
3860 static inline u32
3861 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3862 {
3863         return (le32toh(rx->wb.lower.lo_dword.data) &
3864             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3865 }
3866
3867 /*********************************************************************
3868  *
3869  *  Initialize Hardware RSC (LRO) feature on 82599
3870  *  for an RX ring, this is toggled by the LRO capability
3871  *  even though it is transparent to the stack.
3872  *
3873  **********************************************************************/
3874 #if 0   /* NET_LRO */
3875 static void
3876 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3877 {
3878         struct  adapter         *adapter = rxr->adapter;
3879         struct  ixgbe_hw        *hw = &adapter->hw;
3880         u32                     rscctrl, rdrxctl;
3881
3882         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3883         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3884 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3885         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3886 #endif /* DEV_NETMAP */
3887         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3888         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3889         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3890
3891         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3892         rscctrl |= IXGBE_RSCCTL_RSCEN;
3893         /*
3894         ** Limit the total number of descriptors that
3895         ** can be combined, so it does not exceed 64K
3896         */
3897         if (adapter->rx_mbuf_sz == MCLBYTES)
3898                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3899         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3900                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3901         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3902                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3903         else  /* Using 16K cluster */
3904                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3905
3906         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3907
3908         /* Enable TCP header recognition */
3909         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3910             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3911             IXGBE_PSRTYPE_TCPHDR));
3912
3913         /* Disable RSC for ACK packets */
3914         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3915             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3916
3917         rxr->hw_rsc = TRUE;
3918 }
3919 #endif
3920
3921 static void     
3922 ixgbe_free_receive_ring(struct rx_ring *rxr)
3923
3924         struct  adapter         *adapter;
3925         struct ixgbe_rx_buf       *rxbuf;
3926         int i;
3927
3928         adapter = rxr->adapter;
3929         for (i = 0; i < adapter->num_rx_desc; i++) {
3930                 rxbuf = &rxr->rx_buffers[i];
3931                 if (rxbuf->m_head != NULL) {
3932                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3933                             BUS_DMASYNC_POSTREAD);
3934                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3935                         rxbuf->m_head->m_flags |= M_PKTHDR;
3936                         m_freem(rxbuf->m_head);
3937                 }
3938                 if (rxbuf->m_pack != NULL) {
3939                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3940                             BUS_DMASYNC_POSTREAD);
3941                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3942                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3943                         m_freem(rxbuf->m_pack);
3944                 }
3945                 rxbuf->m_head = NULL;
3946                 rxbuf->m_pack = NULL;
3947         }
3948 }
3949
3950
3951 /*********************************************************************
3952  *
3953  *  Initialize a receive ring and its buffers.
3954  *
3955  **********************************************************************/
3956 static int
3957 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3958 {
3959         struct  adapter         *adapter;
3960         struct ifnet            *ifp;
3961         device_t                dev;
3962         struct ixgbe_rx_buf     *rxbuf;
3963         bus_dma_segment_t       pseg[1], hseg[1];
3964 #if 0   /* NET_LRO */
3965         struct lro_ctrl         *lro = &rxr->lro;
3966 #endif
3967         int                     rsize, nsegs, error = 0;
3968 #ifdef DEV_NETMAP
3969         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3970         struct netmap_slot *slot;
3971 #endif /* DEV_NETMAP */
3972
3973         adapter = rxr->adapter;
3974         ifp = adapter->ifp;
3975         dev = adapter->dev;
3976
3977         /* Clear the ring contents */
3978         IXGBE_RX_LOCK(rxr);
3979 #ifdef DEV_NETMAP
3980         /* same as in ixgbe_setup_transmit_ring() */
3981         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3982 #endif /* DEV_NETMAP */
3983         rsize = roundup2(adapter->num_rx_desc *
3984             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3985         bzero((void *)rxr->rx_base, rsize);
3986
3987         /* Free current RX buffer structs and their mbufs */
3988         ixgbe_free_receive_ring(rxr);
3989
3990         /* Configure header split? */
3991         if (ixgbe_header_split)
3992                 rxr->hdr_split = TRUE;
3993
3994         /* Now replenish the mbufs */
3995         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3996                 struct mbuf     *mh, *mp;
3997
3998                 rxbuf = &rxr->rx_buffers[j];
3999 #ifdef DEV_NETMAP
4000                 /*
4001                  * In netmap mode, fill the map and set the buffer
4002                  * address in the NIC ring, considering the offset
4003                  * between the netmap and NIC rings (see comment in
4004                  * ixgbe_setup_transmit_ring() ). No need to allocate
4005                  * an mbuf, so end the block with a continue;
4006                  */