c433cac9af98655aedafe217fc304242db052db1
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 static int      ixgbe_setup_msix(struct adapter *);
125 static void     ixgbe_free_pci_resources(struct adapter *);
126 static void     ixgbe_local_timer(void *);
127 static int      ixgbe_setup_interface(device_t, struct adapter *);
128 static void     ixgbe_config_link(struct adapter *);
129
130 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
131 static int      ixgbe_setup_transmit_structures(struct adapter *);
132 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
133 static void     ixgbe_initialize_transmit_units(struct adapter *);
134 static void     ixgbe_free_transmit_structures(struct adapter *);
135 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
136
137 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
138 static int      ixgbe_setup_receive_structures(struct adapter *);
139 static int      ixgbe_setup_receive_ring(struct rx_ring *);
140 static void     ixgbe_initialize_receive_units(struct adapter *);
141 static void     ixgbe_free_receive_structures(struct adapter *);
142 static void     ixgbe_free_receive_buffers(struct rx_ring *);
143 #if 0   /* NET_LRO */
144 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
145 #endif
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static bool     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *, int);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
165                     const char *, int *, int);
166 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
167 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
168 static int      ixgbe_tso_pullup(struct tx_ring *, struct mbuf **);
169 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
170 static void     ixgbe_configure_ivars(struct adapter *);
171 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
172
173 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
174 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
175 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
176
177 static void     ixgbe_add_hw_stats(struct adapter *adapter);
178
179 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
180 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
181                     struct mbuf *, u32);
182
183 /* Support for pluggable optic modules */
184 static bool     ixgbe_sfp_probe(struct adapter *);
185 static void     ixgbe_setup_optics(struct adapter *);
186
187 /* Legacy (single vector interrupt handler */
188 static void     ixgbe_legacy_irq(void *);
189
190 /* The MSI/X Interrupt handlers */
191 static void     ixgbe_msix_que(void *);
192 static void     ixgbe_msix_link(void *);
193
194 /* Deferred interrupt tasklets */
195 static void     ixgbe_handle_que(void *, int);
196 static void     ixgbe_handle_link(void *, int);
197 static void     ixgbe_handle_msf(void *, int);
198 static void     ixgbe_handle_mod(void *, int);
199
200 #ifdef IXGBE_FDIR
201 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
202 static void     ixgbe_reinit_fdir(void *, int);
203 #endif
204
205 /*********************************************************************
206  *  FreeBSD Device Interface Entry Points
207  *********************************************************************/
208
209 static device_method_t ixgbe_methods[] = {
210         /* Device interface */
211         DEVMETHOD(device_probe, ixgbe_probe),
212         DEVMETHOD(device_attach, ixgbe_attach),
213         DEVMETHOD(device_detach, ixgbe_detach),
214         DEVMETHOD(device_shutdown, ixgbe_shutdown),
215         {0, 0}
216 };
217
218 static driver_t ixgbe_driver = {
219         "ix", ixgbe_methods, sizeof(struct adapter),
220 };
221
222 devclass_t ixgbe_devclass;
223 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
224
225 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
226 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
227
228 /*
229 ** TUNEABLE PARAMETERS:
230 */
231
232 /*
233 ** AIM: Adaptive Interrupt Moderation
234 ** which means that the interrupt rate
235 ** is varied over time based on the
236 ** traffic for that interrupt vector
237 */
238 static int ixgbe_enable_aim = TRUE;
239 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
240
241 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
242 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
243
244 /* How many packets rxeof tries to clean at a time */
245 static int ixgbe_rx_process_limit = 128;
246 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
247
248 /*
249 ** Smart speed setting, default to on
250 ** this only works as a compile option
251 ** right now as its during attach, set
252 ** this to 'ixgbe_smart_speed_off' to
253 ** disable.
254 */
255 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
256
257 static int ixgbe_msi_enable = 1;
258 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
259
260 /*
261  * MSIX should be the default for best performance,
262  * but this allows it to be forced off for testing.
263  */
264 static int ixgbe_enable_msix = 1;
265 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
266
267 /*
268  * Header split: this causes the hardware to DMA
269  * the header into a separate mbuf from the payload,
270  * it can be a performance win in some workloads, but
271  * in others it actually hurts, its off by default. 
272  */
273 static int ixgbe_header_split = FALSE;
274 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
275
276 /*
277  * Number of Queues, can be set to 0,
278  * it then autoconfigures based on the
279  * number of cpus with a max of 8. This
280  * can be overriden manually here.
281  */
282 static int ixgbe_num_queues = 0;
283 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
284
285 /*
286 ** Number of TX descriptors per ring,
287 ** setting higher than RX as this seems
288 ** the better performing choice.
289 */
290 static int ixgbe_txd = PERFORM_TXD;
291 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
292
293 /* Number of RX descriptors per ring */
294 static int ixgbe_rxd = PERFORM_RXD;
295 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
296
297 /* Keep running tab on them for sanity check */
298 static int ixgbe_total_ports;
299
300 #ifdef IXGBE_FDIR
301 /*
302 ** For Flow Director: this is the
303 ** number of TX packets we sample
304 ** for the filter pool, this means
305 ** every 20th packet will be probed.
306 **
307 ** This feature can be disabled by 
308 ** setting this to 0.
309 */
310 static int atr_sample_rate = 20;
311 /* 
312 ** Flow Director actually 'steals'
313 ** part of the packet buffer as its
314 ** filter pool, this variable controls
315 ** how much it uses:
316 **  0 = 64K, 1 = 128K, 2 = 256K
317 */
318 static int fdir_pballoc = 1;
319 #endif
320
321 #ifdef DEV_NETMAP
322 /*
323  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
324  * be a reference on how to implement netmap support in a driver.
325  * Additional comments are in ixgbe_netmap.h .
326  *
327  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
328  * that extend the standard driver.
329  */
330 #include <dev/netmap/ixgbe_netmap.h>
331 #endif /* DEV_NETMAP */
332
333 /*********************************************************************
334  *  Device identification routine
335  *
336  *  ixgbe_probe determines if the driver should be loaded on
337  *  adapter based on PCI vendor/device id of the adapter.
338  *
339  *  return BUS_PROBE_DEFAULT on success, positive on failure
340  *********************************************************************/
341
342 static int
343 ixgbe_probe(device_t dev)
344 {
345         ixgbe_vendor_info_t *ent;
346
347         u16     pci_vendor_id = 0;
348         u16     pci_device_id = 0;
349         u16     pci_subvendor_id = 0;
350         u16     pci_subdevice_id = 0;
351         char    adapter_name[256];
352
353         INIT_DEBUGOUT("ixgbe_probe: begin");
354
355         pci_vendor_id = pci_get_vendor(dev);
356         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
357                 return (ENXIO);
358
359         pci_device_id = pci_get_device(dev);
360         pci_subvendor_id = pci_get_subvendor(dev);
361         pci_subdevice_id = pci_get_subdevice(dev);
362
363         ent = ixgbe_vendor_info_array;
364         while (ent->vendor_id != 0) {
365                 if ((pci_vendor_id == ent->vendor_id) &&
366                     (pci_device_id == ent->device_id) &&
367
368                     ((pci_subvendor_id == ent->subvendor_id) ||
369                      (ent->subvendor_id == 0)) &&
370
371                     ((pci_subdevice_id == ent->subdevice_id) ||
372                      (ent->subdevice_id == 0))) {
373                         ksprintf(adapter_name, "%s, Version - %s",
374                                 ixgbe_strings[ent->index],
375                                 ixgbe_driver_version);
376                         device_set_desc_copy(dev, adapter_name);
377                         ++ixgbe_total_ports;
378                         return (BUS_PROBE_DEFAULT);
379                 }
380                 ent++;
381         }
382         return (ENXIO);
383 }
384
385 /*********************************************************************
386  *  Device initialization routine
387  *
388  *  The attach entry point is called when the driver is being loaded.
389  *  This routine identifies the type of hardware, allocates all resources
390  *  and initializes the hardware.
391  *
392  *  return 0 on success, positive on failure
393  *********************************************************************/
394
395 static int
396 ixgbe_attach(device_t dev)
397 {
398         struct adapter *adapter;
399         struct ixgbe_hw *hw;
400         int             error = 0;
401         u16             csum;
402         u32             ctrl_ext;
403
404         INIT_DEBUGOUT("ixgbe_attach: begin");
405
406         if (resource_disabled("ixgbe", device_get_unit(dev))) {
407                 device_printf(dev, "Disabled by device hint\n");
408                 return (ENXIO);
409         }
410
411         /* Allocate, clear, and link in our adapter structure */
412         adapter = device_get_softc(dev);
413         adapter->dev = adapter->osdep.dev = dev;
414         hw = &adapter->hw;
415
416         /* Core Lock Init*/
417         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
418
419         /* SYSCTL APIs */
420
421         sysctl_ctx_init(&adapter->sysctl_ctx);
422         adapter->sysctl_tree = SYSCTL_ADD_NODE(&adapter->sysctl_ctx,
423             SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO,
424             device_get_nameunit(adapter->dev), CTLFLAG_RD, 0, "");
425         if (adapter->sysctl_tree == NULL) {
426                 device_printf(adapter->dev, "can't add sysctl node\n");
427                 return (EINVAL);
428         }
429         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
430                         SYSCTL_CHILDREN(adapter->sysctl_tree),
431                         OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
432                         adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
433
434         SYSCTL_ADD_INT(&adapter->sysctl_ctx,
435                         SYSCTL_CHILDREN(adapter->sysctl_tree),
436                         OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
437                         &ixgbe_enable_aim, 1, "Interrupt Moderation");
438
439         /*
440         ** Allow a kind of speed control by forcing the autoneg
441         ** advertised speed list to only a certain value, this
442         ** supports 1G on 82599 devices, and 100Mb on x540.
443         */
444         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
445                         SYSCTL_CHILDREN(adapter->sysctl_tree),
446                         OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
447                         adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
448
449         SYSCTL_ADD_PROC(&adapter->sysctl_ctx,
450                         SYSCTL_CHILDREN(adapter->sysctl_tree),
451                         OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
452                         0, ixgbe_set_thermal_test, "I", "Thermal Test");
453
454         /* Set up the timer callout */
455         callout_init_mp(&adapter->timer);
456
457         /* Determine hardware revision */
458         ixgbe_identify_hardware(adapter);
459
460         /* Enable bus mastering */
461         pci_enable_busmaster(dev);
462
463         /* Do base PCI setup - map BAR0 */
464         if (ixgbe_allocate_pci_resources(adapter)) {
465                 device_printf(dev, "Allocation of PCI resources failed\n");
466                 error = ENXIO;
467                 goto err_out;
468         }
469
470         /* Do descriptor calc and sanity checks */
471         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
472             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
473                 device_printf(dev, "TXD config issue, using default!\n");
474                 adapter->num_tx_desc = DEFAULT_TXD;
475         } else
476                 adapter->num_tx_desc = ixgbe_txd;
477
478         /*
479         ** With many RX rings it is easy to exceed the
480         ** system mbuf allocation. Tuning nmbclusters
481         ** can alleviate this.
482         */
483         if (nmbclusters > 0 ) {
484                 int s;
485                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
486                 if (s > nmbclusters) {
487                         device_printf(dev, "RX Descriptors exceed "
488                             "system mbuf max, using default instead!\n");
489                         ixgbe_rxd = DEFAULT_RXD;
490                 }
491         }
492
493         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
494             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
495                 device_printf(dev, "RXD config issue, using default!\n");
496                 adapter->num_rx_desc = DEFAULT_RXD;
497         } else
498                 adapter->num_rx_desc = ixgbe_rxd;
499
500         /* Allocate our TX/RX Queues */
501         if (ixgbe_allocate_queues(adapter)) {
502                 error = ENOMEM;
503                 goto err_out;
504         }
505
506         /* Allocate multicast array memory. */
507         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
508             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
509         if (adapter->mta == NULL) {
510                 device_printf(dev, "Can not allocate multicast setup array\n");
511                 error = ENOMEM;
512                 goto err_late;
513         }
514
515         /* Initialize the shared code */
516         error = ixgbe_init_shared_code(hw);
517         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
518                 /*
519                 ** No optics in this port, set up
520                 ** so the timer routine will probe 
521                 ** for later insertion.
522                 */
523                 adapter->sfp_probe = TRUE;
524                 error = 0;
525         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
526                 device_printf(dev,"Unsupported SFP+ module detected!\n");
527                 error = EIO;
528                 goto err_late;
529         } else if (error) {
530                 device_printf(dev,"Unable to initialize the shared code\n");
531                 error = EIO;
532                 goto err_late;
533         }
534
535         /* Make sure we have a good EEPROM before we read from it */
536         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
537                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
538                 error = EIO;
539                 goto err_late;
540         }
541
542         error = ixgbe_init_hw(hw);
543         switch (error) {
544         case IXGBE_ERR_EEPROM_VERSION:
545                 device_printf(dev, "This device is a pre-production adapter/"
546                     "LOM.  Please be aware there may be issues associated "
547                     "with your hardware.\n If you are experiencing problems "
548                     "please contact your Intel or hardware representative "
549                     "who provided you with this hardware.\n");
550                 break;
551         case IXGBE_ERR_SFP_NOT_SUPPORTED:
552                 device_printf(dev,"Unsupported SFP+ Module\n");
553                 error = EIO;
554                 device_printf(dev,"Hardware Initialization Failure\n");
555                 goto err_late;
556         case IXGBE_ERR_SFP_NOT_PRESENT:
557                 device_printf(dev,"No SFP+ Module found\n");
558                 /* falls thru */
559         default:
560                 break;
561         }
562
563         /* Detect and set physical type */
564         ixgbe_setup_optics(adapter);
565
566         if ((adapter->msix > 1) && (ixgbe_enable_msix))
567                 error = ixgbe_allocate_msix(adapter); 
568         else
569                 error = ixgbe_allocate_legacy(adapter); 
570         if (error) 
571                 goto err_late;
572
573         /* Setup OS specific network interface */
574         if (ixgbe_setup_interface(dev, adapter) != 0)
575                 goto err_late;
576
577         /* Sysctl for limiting the amount of work done in the taskqueue */
578         ixgbe_add_rx_process_limit(adapter, "rx_processing_limit",
579             "max number of rx packets to process", &adapter->rx_process_limit,
580             ixgbe_rx_process_limit);
581
582         /* Initialize statistics */
583         ixgbe_update_stats_counters(adapter);
584
585         /* Register for VLAN events */
586         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
587             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
588         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
589             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
590
591         /* Print PCIE bus type/speed/width info */
592         ixgbe_get_bus_info(hw);
593         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
594             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
595             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
596             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
597             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
598             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
599             ("Unknown"));
600
601         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
602             (hw->bus.speed == ixgbe_bus_speed_2500)) {
603                 device_printf(dev, "PCI-Express bandwidth available"
604                     " for this card\n     is not sufficient for"
605                     " optimal performance.\n");
606                 device_printf(dev, "For optimal performance a x8 "
607                     "PCIE, or x4 PCIE 2 slot is required.\n");
608         }
609
610         /* let hardware know driver is loaded */
611         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
612         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
613         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
614
615         ixgbe_add_hw_stats(adapter);
616
617 #ifdef DEV_NETMAP
618         ixgbe_netmap_attach(adapter);
619 #endif /* DEV_NETMAP */
620         INIT_DEBUGOUT("ixgbe_attach: end");
621         return (0);
622 err_late:
623         ixgbe_free_transmit_structures(adapter);
624         ixgbe_free_receive_structures(adapter);
625 err_out:
626         if (adapter->ifp != NULL)
627                 if_free(adapter->ifp);
628         ixgbe_free_pci_resources(adapter);
629         kfree(adapter->mta, M_DEVBUF);
630         return (error);
631
632 }
633
634 /*********************************************************************
635  *  Device removal routine
636  *
637  *  The detach entry point is called when the driver is being removed.
638  *  This routine stops the adapter and deallocates all the resources
639  *  that were allocated for driver operation.
640  *
641  *  return 0 on success, positive on failure
642  *********************************************************************/
643
644 static int
645 ixgbe_detach(device_t dev)
646 {
647         struct adapter *adapter = device_get_softc(dev);
648         struct ix_queue *que = adapter->queues;
649         u32     ctrl_ext;
650
651         INIT_DEBUGOUT("ixgbe_detach: begin");
652
653         /* Make sure VLANS are not using driver */
654         if (adapter->ifp->if_vlantrunks != NULL) {
655                 device_printf(dev,"Vlan in use, detach first\n");
656                 return (EBUSY);
657         }
658
659         IXGBE_CORE_LOCK(adapter);
660         ixgbe_stop(adapter);
661         IXGBE_CORE_UNLOCK(adapter);
662
663         for (int i = 0; i < adapter->num_queues; i++, que++) {
664                 if (que->tq) {
665                         taskqueue_drain(que->tq, &que->que_task);
666                         taskqueue_free(que->tq);
667                 }
668         }
669
670         /* Drain the Link queue */
671         if (adapter->tq) {
672                 taskqueue_drain(adapter->tq, &adapter->link_task);
673                 taskqueue_drain(adapter->tq, &adapter->mod_task);
674                 taskqueue_drain(adapter->tq, &adapter->msf_task);
675 #ifdef IXGBE_FDIR
676                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
677 #endif
678                 taskqueue_free(adapter->tq);
679         }
680
681         /* let hardware know driver is unloading */
682         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
683         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
684         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
685
686         /* Unregister VLAN events */
687         if (adapter->vlan_attach != NULL)
688                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
689         if (adapter->vlan_detach != NULL)
690                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
691
692         ether_ifdetach(adapter->ifp);
693         callout_stop(&adapter->timer);
694 #ifdef DEV_NETMAP
695         netmap_detach(adapter->ifp);
696 #endif /* DEV_NETMAP */
697         ixgbe_free_pci_resources(adapter);
698         bus_generic_detach(dev);
699         if_free(adapter->ifp);
700
701         ixgbe_free_transmit_structures(adapter);
702         ixgbe_free_receive_structures(adapter);
703         kfree(adapter->mta, M_DEVBUF);
704         sysctl_ctx_free(&adapter->sysctl_ctx);
705         
706         IXGBE_CORE_LOCK_DESTROY(adapter);
707         return (0);
708 }
709
710 /*********************************************************************
711  *
712  *  Shutdown entry point
713  *
714  **********************************************************************/
715
716 static int
717 ixgbe_shutdown(device_t dev)
718 {
719         struct adapter *adapter = device_get_softc(dev);
720         IXGBE_CORE_LOCK(adapter);
721         ixgbe_stop(adapter);
722         IXGBE_CORE_UNLOCK(adapter);
723         return (0);
724 }
725
726
727 /*********************************************************************
728  *  Transmit entry point
729  *
730  *  ixgbe_start is called by the stack to initiate a transmit.
731  *  The driver will remain in this routine as long as there are
732  *  packets to transmit and transmit resources are available.
733  *  In case resources are not available stack is notified and
734  *  the packet is requeued.
735  **********************************************************************/
736
737 static void
738 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
739 {
740         struct mbuf    *m_head;
741         struct adapter *adapter = txr->adapter;
742
743         IXGBE_TX_LOCK_ASSERT(txr);
744
745         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
746                 return;
747         if (!adapter->link_active)
748                 return;
749
750         while (!ifq_is_empty(&ifp->if_snd)) {
751                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
752                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
753                         break;
754                 }
755
756                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
757                 if (m_head == NULL)
758                         break;
759
760                 if (ixgbe_xmit(txr, &m_head)) {
761 #if 0 /* XXX: prepend to an ALTQ queue ? */
762                         if (m_head != NULL)
763                                 IF_PREPEND(&ifp->if_snd, m_head);
764 #endif
765                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
766                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
767                         break;
768                 }
769                 /* Send a copy of the frame to the BPF listener */
770                 ETHER_BPF_MTAP(ifp, m_head);
771
772                 /* Set watchdog on */
773                 txr->watchdog_time = ticks;
774                 txr->queue_status = IXGBE_QUEUE_WORKING;
775
776         }
777         return;
778 }
779
780 /*
781  * Legacy TX start - called by the stack, this
782  * always uses the first tx ring, and should
783  * not be used with multiqueue tx enabled.
784  */
785 static void
786 ixgbe_start(struct ifnet *ifp)
787 {
788         struct adapter *adapter = ifp->if_softc;
789         struct tx_ring  *txr = adapter->tx_rings;
790
791         if (ifp->if_flags & IFF_RUNNING) {
792                 IXGBE_TX_LOCK(txr);
793                 ixgbe_start_locked(txr, ifp);
794                 IXGBE_TX_UNLOCK(txr);
795         }
796         return;
797 }
798
799 #if 0 /* __FreeBSD_version >= 800000 */
800 /*
801 ** Multiqueue Transmit driver
802 **
803 */
804 static int
805 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
806 {
807         struct adapter  *adapter = ifp->if_softc;
808         struct ix_queue *que;
809         struct tx_ring  *txr;
810         int             i = 0, err = 0;
811
812         /* Which queue to use */
813         if ((m->m_flags & M_FLOWID) != 0)
814                 i = m->m_pkthdr.flowid % adapter->num_queues;
815         else
816                 i = curcpu % adapter->num_queues;
817
818         txr = &adapter->tx_rings[i];
819         que = &adapter->queues[i];
820
821         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
822             IXGBE_TX_TRYLOCK(txr)) {
823                 err = ixgbe_mq_start_locked(ifp, txr, m);
824                 IXGBE_TX_UNLOCK(txr);
825         } else {
826                 err = drbr_enqueue(ifp, txr->br, m);
827                 taskqueue_enqueue(que->tq, &que->que_task);
828         }
829
830         return (err);
831 }
832
833 static int
834 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
835 {
836         struct adapter  *adapter = txr->adapter;
837         struct mbuf     *next;
838         int             enqueued, err = 0;
839
840         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
841             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
842             adapter->link_active == 0) {
843                 if (m != NULL)
844                         err = drbr_enqueue(ifp, txr->br, m);
845                 return (err);
846         }
847
848         enqueued = 0;
849         if (m == NULL) {
850                 next = drbr_dequeue(ifp, txr->br);
851         } else if (drbr_needs_enqueue(ifp, txr->br)) {
852                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
853                         return (err);
854                 next = drbr_dequeue(ifp, txr->br);
855         } else
856                 next = m;
857
858         /* Process the queue */
859         while (next != NULL) {
860                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
861                         if (next != NULL)
862                                 err = drbr_enqueue(ifp, txr->br, next);
863                         break;
864                 }
865                 enqueued++;
866                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
867                 /* Send a copy of the frame to the BPF listener */
868                 ETHER_BPF_MTAP(ifp, next);
869                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
870                         break;
871                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
872                         ixgbe_txeof(txr);
873                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
874                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
875                         break;
876                 }
877                 next = drbr_dequeue(ifp, txr->br);
878         }
879
880         if (enqueued > 0) {
881                 /* Set watchdog on */
882                 txr->queue_status |= IXGBE_QUEUE_WORKING;
883                 txr->watchdog_time = ticks;
884         }
885
886         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
887                 ixgbe_txeof(txr);
888
889         return (err);
890 }
891
892 /*
893 ** Flush all ring buffers
894 */
895 static void
896 ixgbe_qflush(struct ifnet *ifp)
897 {
898         struct adapter  *adapter = ifp->if_softc;
899         struct tx_ring  *txr = adapter->tx_rings;
900         struct mbuf     *m;
901
902         for (int i = 0; i < adapter->num_queues; i++, txr++) {
903                 IXGBE_TX_LOCK(txr);
904                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
905                         m_freem(m);
906                 IXGBE_TX_UNLOCK(txr);
907         }
908         if_qflush(ifp);
909 }
910 #endif /* __FreeBSD_version >= 800000 */
911
912 /*********************************************************************
913  *  Ioctl entry point
914  *
915  *  ixgbe_ioctl is called when the user wants to configure the
916  *  interface.
917  *
918  *  return 0 on success, positive on failure
919  **********************************************************************/
920
921 static int
922 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
923 {
924         struct adapter  *adapter = ifp->if_softc;
925         struct ifreq    *ifr = (struct ifreq *) data;
926 #if defined(INET) || defined(INET6)
927         struct ifaddr *ifa = (struct ifaddr *)data;
928         bool            avoid_reset = FALSE;
929 #endif
930         int             error = 0;
931
932         switch (command) {
933
934         case SIOCSIFADDR:
935 #ifdef INET
936                 if (ifa->ifa_addr->sa_family == AF_INET)
937                         avoid_reset = TRUE;
938 #endif
939 #ifdef INET6
940                 if (ifa->ifa_addr->sa_family == AF_INET6)
941                         avoid_reset = TRUE;
942 #endif
943 #if defined(INET) || defined(INET6)
944                 /*
945                 ** Calling init results in link renegotiation,
946                 ** so we avoid doing it when possible.
947                 */
948                 if (avoid_reset) {
949                         ifp->if_flags |= IFF_UP;
950                         if (!(ifp->if_flags & IFF_RUNNING))
951                                 ixgbe_init(adapter);
952                         if (!(ifp->if_flags & IFF_NOARP))
953                                 arp_ifinit(ifp, ifa);
954                 } else
955                         error = ether_ioctl(ifp, command, data);
956 #endif
957                 break;
958         case SIOCSIFMTU:
959                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
960                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
961                         error = EINVAL;
962                 } else {
963                         IXGBE_CORE_LOCK(adapter);
964                         ifp->if_mtu = ifr->ifr_mtu;
965                         adapter->max_frame_size =
966                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
967                         ixgbe_init_locked(adapter);
968                         IXGBE_CORE_UNLOCK(adapter);
969                 }
970                 break;
971         case SIOCSIFFLAGS:
972                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
973                 IXGBE_CORE_LOCK(adapter);
974                 if (ifp->if_flags & IFF_UP) {
975                         if ((ifp->if_flags & IFF_RUNNING)) {
976                                 if ((ifp->if_flags ^ adapter->if_flags) &
977                                     (IFF_PROMISC | IFF_ALLMULTI)) {
978                                         ixgbe_set_promisc(adapter);
979                                 }
980                         } else
981                                 ixgbe_init_locked(adapter);
982                 } else
983                         if (ifp->if_flags & IFF_RUNNING)
984                                 ixgbe_stop(adapter);
985                 adapter->if_flags = ifp->if_flags;
986                 IXGBE_CORE_UNLOCK(adapter);
987                 break;
988         case SIOCADDMULTI:
989         case SIOCDELMULTI:
990                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
991                 if (ifp->if_flags & IFF_RUNNING) {
992                         IXGBE_CORE_LOCK(adapter);
993                         ixgbe_disable_intr(adapter);
994                         ixgbe_set_multi(adapter);
995                         ixgbe_enable_intr(adapter);
996                         IXGBE_CORE_UNLOCK(adapter);
997                 }
998                 break;
999         case SIOCSIFMEDIA:
1000         case SIOCGIFMEDIA:
1001                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1002                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1003                 break;
1004         case SIOCSIFCAP:
1005         {
1006                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1007                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1008                 if (mask & IFCAP_HWCSUM)
1009                         ifp->if_capenable ^= IFCAP_HWCSUM;
1010                 if (mask & IFCAP_TSO4)
1011                         ifp->if_capenable ^= IFCAP_TSO4;
1012                 if (mask & IFCAP_TSO6)
1013                         ifp->if_capenable ^= IFCAP_TSO6;
1014 #if 0 /* NET_LRO */
1015                 if (mask & IFCAP_LRO)
1016                         ifp->if_capenable ^= IFCAP_LRO;
1017 #endif
1018                 if (mask & IFCAP_VLAN_HWTAGGING)
1019                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1020                 if (mask & IFCAP_VLAN_HWFILTER)
1021                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1022 #if 0 /* NET_TSO */
1023                 if (mask & IFCAP_VLAN_HWTSO)
1024                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1025 #endif
1026                 if (ifp->if_flags & IFF_RUNNING) {
1027                         IXGBE_CORE_LOCK(adapter);
1028                         ixgbe_init_locked(adapter);
1029                         IXGBE_CORE_UNLOCK(adapter);
1030                 }
1031 #if 0
1032                 VLAN_CAPABILITIES(ifp);
1033 #endif
1034                 break;
1035         }
1036
1037         default:
1038                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1039                 error = ether_ioctl(ifp, command, data);
1040                 break;
1041         }
1042
1043         return (error);
1044 }
1045
1046 /*********************************************************************
1047  *  Init entry point
1048  *
1049  *  This routine is used in two ways. It is used by the stack as
1050  *  init entry point in network interface structure. It is also used
1051  *  by the driver as a hw/sw initialization routine to get to a
1052  *  consistent state.
1053  *
1054  *  return 0 on success, positive on failure
1055  **********************************************************************/
1056 #define IXGBE_MHADD_MFS_SHIFT 16
1057
1058 static void
1059 ixgbe_init_locked(struct adapter *adapter)
1060 {
1061         struct ifnet   *ifp = adapter->ifp;
1062         device_t        dev = adapter->dev;
1063         struct ixgbe_hw *hw = &adapter->hw;
1064         u32             k, txdctl, mhadd, gpie;
1065         u32             rxdctl, rxctrl;
1066
1067         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1068         INIT_DEBUGOUT("ixgbe_init: begin");
1069         hw->adapter_stopped = FALSE;
1070         ixgbe_stop_adapter(hw);
1071         callout_stop(&adapter->timer);
1072
1073         /* reprogram the RAR[0] in case user changed it. */
1074         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1075
1076         /* Get the latest mac address, User can use a LAA */
1077         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1078               IXGBE_ETH_LENGTH_OF_ADDRESS);
1079         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1080         hw->addr_ctrl.rar_used_count = 1;
1081
1082         /* Set the various hardware offload abilities */
1083         ifp->if_hwassist = 0;
1084         if (ifp->if_capenable & IFCAP_TSO)
1085                 ifp->if_hwassist |= CSUM_TSO;
1086         if (ifp->if_capenable & IFCAP_TXCSUM) {
1087                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1088 #if 0
1089                 if (hw->mac.type != ixgbe_mac_82598EB)
1090                         ifp->if_hwassist |= CSUM_SCTP;
1091 #endif
1092         }
1093
1094         /* Prepare transmit descriptors and buffers */
1095         if (ixgbe_setup_transmit_structures(adapter)) {
1096                 device_printf(dev,"Could not setup transmit structures\n");
1097                 ixgbe_stop(adapter);
1098                 return;
1099         }
1100
1101         ixgbe_init_hw(hw);
1102         ixgbe_initialize_transmit_units(adapter);
1103
1104         /* Setup Multicast table */
1105         ixgbe_set_multi(adapter);
1106
1107         /*
1108         ** Determine the correct mbuf pool
1109         ** for doing jumbo/headersplit
1110         */
1111         if (adapter->max_frame_size <= 2048)
1112                 adapter->rx_mbuf_sz = MCLBYTES;
1113         else if (adapter->max_frame_size <= 4096)
1114                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1115         else if (adapter->max_frame_size <= 9216)
1116                 adapter->rx_mbuf_sz = MJUM9BYTES;
1117         else
1118                 adapter->rx_mbuf_sz = MJUM16BYTES;
1119
1120         /* Prepare receive descriptors and buffers */
1121         if (ixgbe_setup_receive_structures(adapter)) {
1122                 device_printf(dev,"Could not setup receive structures\n");
1123                 ixgbe_stop(adapter);
1124                 return;
1125         }
1126
1127         /* Configure RX settings */
1128         ixgbe_initialize_receive_units(adapter);
1129
1130         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1131
1132         /* Enable Fan Failure Interrupt */
1133         gpie |= IXGBE_SDP1_GPIEN;
1134
1135         /* Add for Module detection */
1136         if (hw->mac.type == ixgbe_mac_82599EB)
1137                 gpie |= IXGBE_SDP2_GPIEN;
1138
1139         /* Thermal Failure Detection */
1140         if (hw->mac.type == ixgbe_mac_X540)
1141                 gpie |= IXGBE_SDP0_GPIEN;
1142
1143         if (adapter->msix > 1) {
1144                 /* Enable Enhanced MSIX mode */
1145                 gpie |= IXGBE_GPIE_MSIX_MODE;
1146                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1147                     IXGBE_GPIE_OCD;
1148         }
1149         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1150
1151         /* Set MTU size */
1152         if (ifp->if_mtu > ETHERMTU) {
1153                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1154                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1155                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1156                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1157         }
1158         
1159         /* Now enable all the queues */
1160
1161         for (int i = 0; i < adapter->num_queues; i++) {
1162                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1163                 txdctl |= IXGBE_TXDCTL_ENABLE;
1164                 /* Set WTHRESH to 8, burst writeback */
1165                 txdctl |= (8 << 16);
1166                 /*
1167                  * When the internal queue falls below PTHRESH (32),
1168                  * start prefetching as long as there are at least
1169                  * HTHRESH (1) buffers ready. The values are taken
1170                  * from the Intel linux driver 3.8.21.
1171                  * Prefetching enables tx line rate even with 1 queue.
1172                  */
1173                 txdctl |= (32 << 0) | (1 << 8);
1174                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1175         }
1176
1177         for (int i = 0; i < adapter->num_queues; i++) {
1178                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1179                 if (hw->mac.type == ixgbe_mac_82598EB) {
1180                         /*
1181                         ** PTHRESH = 21
1182                         ** HTHRESH = 4
1183                         ** WTHRESH = 8
1184                         */
1185                         rxdctl &= ~0x3FFFFF;
1186                         rxdctl |= 0x080420;
1187                 }
1188                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1189                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1190                 for (k = 0; k < 10; k++) {
1191                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1192                             IXGBE_RXDCTL_ENABLE)
1193                                 break;
1194                         else
1195                                 msec_delay(1);
1196                 }
1197                 wmb();
1198 #ifdef DEV_NETMAP
1199                 /*
1200                  * In netmap mode, we must preserve the buffers made
1201                  * available to userspace before the if_init()
1202                  * (this is true by default on the TX side, because
1203                  * init makes all buffers available to userspace).
1204                  *
1205                  * netmap_reset() and the device specific routines
1206                  * (e.g. ixgbe_setup_receive_rings()) map these
1207                  * buffers at the end of the NIC ring, so here we
1208                  * must set the RDT (tail) register to make sure
1209                  * they are not overwritten.
1210                  *
1211                  * In this driver the NIC ring starts at RDH = 0,
1212                  * RDT points to the last slot available for reception (?),
1213                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1214                  */
1215                 if (ifp->if_capenable & IFCAP_NETMAP) {
1216                         struct netmap_adapter *na = NA(adapter->ifp);
1217                         struct netmap_kring *kring = &na->rx_rings[i];
1218                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1219
1220                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1221                 } else
1222 #endif /* DEV_NETMAP */
1223                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1224         }
1225
1226         /* Set up VLAN support and filter */
1227         ixgbe_setup_vlan_hw_support(adapter);
1228
1229         /* Enable Receive engine */
1230         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1231         if (hw->mac.type == ixgbe_mac_82598EB)
1232                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1233         rxctrl |= IXGBE_RXCTRL_RXEN;
1234         ixgbe_enable_rx_dma(hw, rxctrl);
1235
1236         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1237
1238         /* Set up MSI/X routing */
1239         if (ixgbe_enable_msix)  {
1240                 ixgbe_configure_ivars(adapter);
1241                 /* Set up auto-mask */
1242                 if (hw->mac.type == ixgbe_mac_82598EB)
1243                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1244                 else {
1245                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1246                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1247                 }
1248         } else {  /* Simple settings for Legacy/MSI */
1249                 ixgbe_set_ivar(adapter, 0, 0, 0);
1250                 ixgbe_set_ivar(adapter, 0, 0, 1);
1251                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1252         }
1253
1254 #ifdef IXGBE_FDIR
1255         /* Init Flow director */
1256         if (hw->mac.type != ixgbe_mac_82598EB) {
1257                 u32 hdrm = 32 << fdir_pballoc;
1258
1259                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1260                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1261         }
1262 #endif
1263
1264         /*
1265         ** Check on any SFP devices that
1266         ** need to be kick-started
1267         */
1268         if (hw->phy.type == ixgbe_phy_none) {
1269                 int err = hw->phy.ops.identify(hw);
1270                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1271                         device_printf(dev,
1272                             "Unsupported SFP+ module type was detected.\n");
1273                         return;
1274                 }
1275         }
1276
1277         /* Set moderation on the Link interrupt */
1278         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1279
1280         /* Config/Enable Link */
1281         ixgbe_config_link(adapter);
1282
1283         /* Hardware Packet Buffer & Flow Control setup */
1284         {
1285                 u32 rxpb, frame, size, tmp;
1286
1287                 frame = adapter->max_frame_size;
1288
1289                 /* Calculate High Water */
1290                 if (hw->mac.type == ixgbe_mac_X540)
1291                         tmp = IXGBE_DV_X540(frame, frame);
1292                 else
1293                         tmp = IXGBE_DV(frame, frame);
1294                 size = IXGBE_BT2KB(tmp);
1295                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1296                 hw->fc.high_water[0] = rxpb - size;
1297
1298                 /* Now calculate Low Water */
1299                 if (hw->mac.type == ixgbe_mac_X540)
1300                         tmp = IXGBE_LOW_DV_X540(frame);
1301                 else
1302                         tmp = IXGBE_LOW_DV(frame);
1303                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1304                 
1305                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1306                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1307                 hw->fc.send_xon = TRUE;
1308         }
1309         /* Initialize the FC settings */
1310         ixgbe_start_hw(hw);
1311
1312         /* And now turn on interrupts */
1313         ixgbe_enable_intr(adapter);
1314
1315         /* Now inform the stack we're ready */
1316         ifp->if_flags |= IFF_RUNNING;
1317         ifp->if_flags &= ~IFF_OACTIVE;
1318
1319         return;
1320 }
1321
1322 static void
1323 ixgbe_init(void *arg)
1324 {
1325         struct adapter *adapter = arg;
1326
1327         IXGBE_CORE_LOCK(adapter);
1328         ixgbe_init_locked(adapter);
1329         IXGBE_CORE_UNLOCK(adapter);
1330         return;
1331 }
1332
1333
1334 /*
1335 **
1336 ** MSIX Interrupt Handlers and Tasklets
1337 **
1338 */
1339
1340 static inline void
1341 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1342 {
1343         struct ixgbe_hw *hw = &adapter->hw;
1344         u64     queue = (u64)(1 << vector);
1345         u32     mask;
1346
1347         if (hw->mac.type == ixgbe_mac_82598EB) {
1348                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1349                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1350         } else {
1351                 mask = (queue & 0xFFFFFFFF);
1352                 if (mask)
1353                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1354                 mask = (queue >> 32);
1355                 if (mask)
1356                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1357         }
1358 }
1359
1360 static inline void
1361 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1362 {
1363         struct ixgbe_hw *hw = &adapter->hw;
1364         u64     queue = (u64)(1 << vector);
1365         u32     mask;
1366
1367         if (hw->mac.type == ixgbe_mac_82598EB) {
1368                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1369                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1370         } else {
1371                 mask = (queue & 0xFFFFFFFF);
1372                 if (mask)
1373                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1374                 mask = (queue >> 32);
1375                 if (mask)
1376                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1377         }
1378 }
1379
1380 static inline void
1381 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1382 {
1383         u32 mask;
1384
1385         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1386                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1387                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1388         } else {
1389                 mask = (queues & 0xFFFFFFFF);
1390                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1391                 mask = (queues >> 32);
1392                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1393         }
1394 }
1395
1396
1397 static void
1398 ixgbe_handle_que(void *context, int pending)
1399 {
1400         struct ix_queue *que = context;
1401         struct adapter  *adapter = que->adapter;
1402         struct tx_ring  *txr = que->txr;
1403         struct ifnet    *ifp = adapter->ifp;
1404         bool            more;
1405
1406         if (ifp->if_flags & IFF_RUNNING) {
1407                 more = ixgbe_rxeof(que, adapter->rx_process_limit);
1408                 IXGBE_TX_LOCK(txr);
1409                 ixgbe_txeof(txr);
1410 #if 0 /*__FreeBSD_version >= 800000*/
1411                 if (!drbr_empty(ifp, txr->br))
1412                         ixgbe_mq_start_locked(ifp, txr, NULL);
1413 #else
1414                 if (!ifq_is_empty(&ifp->if_snd))
1415                         ixgbe_start_locked(txr, ifp);
1416 #endif
1417                 IXGBE_TX_UNLOCK(txr);
1418                 if (more) {
1419                         taskqueue_enqueue(que->tq, &que->que_task);
1420                         return;
1421                 }
1422         }
1423
1424         /* Reenable this interrupt */
1425         ixgbe_enable_queue(adapter, que->msix);
1426         return;
1427 }
1428
1429
1430 /*********************************************************************
1431  *
1432  *  Legacy Interrupt Service routine
1433  *
1434  **********************************************************************/
1435
1436 static void
1437 ixgbe_legacy_irq(void *arg)
1438 {
1439         struct ix_queue *que = arg;
1440         struct adapter  *adapter = que->adapter;
1441         struct ixgbe_hw *hw = &adapter->hw;
1442         struct          tx_ring *txr = adapter->tx_rings;
1443         bool            more_tx, more_rx;
1444         u32             reg_eicr, loop = MAX_LOOP;
1445
1446
1447         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1448
1449         ++que->irqs;
1450         if (reg_eicr == 0) {
1451                 ixgbe_enable_intr(adapter);
1452                 return;
1453         }
1454
1455         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1456
1457         IXGBE_TX_LOCK(txr);
1458         do {
1459                 more_tx = ixgbe_txeof(txr);
1460         } while (loop-- && more_tx);
1461         IXGBE_TX_UNLOCK(txr);
1462
1463         if (more_rx || more_tx)
1464                 taskqueue_enqueue(que->tq, &que->que_task);
1465
1466         /* Check for fan failure */
1467         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1468             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1469                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1470                     "REPLACE IMMEDIATELY!!\n");
1471                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1472         }
1473
1474         /* Link status change */
1475         if (reg_eicr & IXGBE_EICR_LSC)
1476                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1477
1478         ixgbe_enable_intr(adapter);
1479         return;
1480 }
1481
1482
1483 /*********************************************************************
1484  *
1485  *  MSIX Queue Interrupt Service routine
1486  *
1487  **********************************************************************/
1488 void
1489 ixgbe_msix_que(void *arg)
1490 {
1491         struct ix_queue *que = arg;
1492         struct adapter  *adapter = que->adapter;
1493         struct tx_ring  *txr = que->txr;
1494         struct rx_ring  *rxr = que->rxr;
1495         bool            more_tx, more_rx;
1496         u32             newitr = 0;
1497
1498         ixgbe_disable_queue(adapter, que->msix);
1499         ++que->irqs;
1500
1501         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1502
1503         IXGBE_TX_LOCK(txr);
1504         more_tx = ixgbe_txeof(txr);
1505         /*
1506         ** Make certain that if the stack 
1507         ** has anything queued the task gets
1508         ** scheduled to handle it.
1509         */
1510 #if 0
1511 #if __FreeBSD_version < 800000
1512         if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
1513 #else
1514         if (!drbr_empty(adapter->ifp, txr->br))
1515 #endif
1516 #endif
1517         if (!ifq_is_empty(&adapter->ifp->if_snd))
1518                 more_tx = 1;
1519         IXGBE_TX_UNLOCK(txr);
1520
1521         /* Do AIM now? */
1522
1523         if (ixgbe_enable_aim == FALSE)
1524                 goto no_calc;
1525         /*
1526         ** Do Adaptive Interrupt Moderation:
1527         **  - Write out last calculated setting
1528         **  - Calculate based on average size over
1529         **    the last interval.
1530         */
1531         if (que->eitr_setting)
1532                 IXGBE_WRITE_REG(&adapter->hw,
1533                     IXGBE_EITR(que->msix), que->eitr_setting);
1534  
1535         que->eitr_setting = 0;
1536
1537         /* Idle, do nothing */
1538         if ((txr->bytes == 0) && (rxr->bytes == 0))
1539                 goto no_calc;
1540                                 
1541         if ((txr->bytes) && (txr->packets))
1542                 newitr = txr->bytes/txr->packets;
1543         if ((rxr->bytes) && (rxr->packets))
1544                 newitr = max(newitr,
1545                     (rxr->bytes / rxr->packets));
1546         newitr += 24; /* account for hardware frame, crc */
1547
1548         /* set an upper boundary */
1549         newitr = min(newitr, 3000);
1550
1551         /* Be nice to the mid range */
1552         if ((newitr > 300) && (newitr < 1200))
1553                 newitr = (newitr / 3);
1554         else
1555                 newitr = (newitr / 2);
1556
1557         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1558                 newitr |= newitr << 16;
1559         else
1560                 newitr |= IXGBE_EITR_CNT_WDIS;
1561                  
1562         /* save for next interrupt */
1563         que->eitr_setting = newitr;
1564
1565         /* Reset state */
1566         txr->bytes = 0;
1567         txr->packets = 0;
1568         rxr->bytes = 0;
1569         rxr->packets = 0;
1570
1571 no_calc:
1572         if (more_tx || more_rx)
1573                 taskqueue_enqueue(que->tq, &que->que_task);
1574         else /* Reenable this interrupt */
1575                 ixgbe_enable_queue(adapter, que->msix);
1576         return;
1577 }
1578
1579
1580 static void
1581 ixgbe_msix_link(void *arg)
1582 {
1583         struct adapter  *adapter = arg;
1584         struct ixgbe_hw *hw = &adapter->hw;
1585         u32             reg_eicr;
1586
1587         ++adapter->link_irq;
1588
1589         /* First get the cause */
1590         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1591         /* Clear interrupt with write */
1592         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1593
1594         /* Link status change */
1595         if (reg_eicr & IXGBE_EICR_LSC)
1596                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1597
1598         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1599 #ifdef IXGBE_FDIR
1600                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1601                         /* This is probably overkill :) */
1602                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1603                                 return;
1604                         /* Disable the interrupt */
1605                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1606                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1607                 } else
1608 #endif
1609                 if (reg_eicr & IXGBE_EICR_ECC) {
1610                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1611                             "Please Reboot!!\n");
1612                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1613                 } else
1614
1615                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1616                         /* Clear the interrupt */
1617                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1618                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1619                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1620                         /* Clear the interrupt */
1621                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1622                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1623                 }
1624         } 
1625
1626         /* Check for fan failure */
1627         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1628             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1629                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1630                     "REPLACE IMMEDIATELY!!\n");
1631                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1632         }
1633
1634         /* Check for over temp condition */
1635         if ((hw->mac.type == ixgbe_mac_X540) &&
1636             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1637                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1638                     "PHY IS SHUT DOWN!!\n");
1639                 device_printf(adapter->dev, "System shutdown required\n");
1640                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1641         }
1642
1643         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1644         return;
1645 }
1646
1647 /*********************************************************************
1648  *
1649  *  Media Ioctl callback
1650  *
1651  *  This routine is called whenever the user queries the status of
1652  *  the interface using ifconfig.
1653  *
1654  **********************************************************************/
1655 static void
1656 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1657 {
1658         struct adapter *adapter = ifp->if_softc;
1659
1660         INIT_DEBUGOUT("ixgbe_media_status: begin");
1661         IXGBE_CORE_LOCK(adapter);
1662         ixgbe_update_link_status(adapter);
1663
1664         ifmr->ifm_status = IFM_AVALID;
1665         ifmr->ifm_active = IFM_ETHER;
1666
1667         if (!adapter->link_active) {
1668                 IXGBE_CORE_UNLOCK(adapter);
1669                 return;
1670         }
1671
1672         ifmr->ifm_status |= IFM_ACTIVE;
1673
1674         switch (adapter->link_speed) {
1675                 case IXGBE_LINK_SPEED_100_FULL:
1676                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1677                         break;
1678                 case IXGBE_LINK_SPEED_1GB_FULL:
1679                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1680                         break;
1681                 case IXGBE_LINK_SPEED_10GB_FULL:
1682                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1683                         break;
1684         }
1685
1686         IXGBE_CORE_UNLOCK(adapter);
1687
1688         return;
1689 }
1690
1691 /*********************************************************************
1692  *
1693  *  Media Ioctl callback
1694  *
1695  *  This routine is called when the user changes speed/duplex using
1696  *  media/mediopt option with ifconfig.
1697  *
1698  **********************************************************************/
1699 static int
1700 ixgbe_media_change(struct ifnet * ifp)
1701 {
1702         struct adapter *adapter = ifp->if_softc;
1703         struct ifmedia *ifm = &adapter->media;
1704
1705         INIT_DEBUGOUT("ixgbe_media_change: begin");
1706
1707         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1708                 return (EINVAL);
1709
1710         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1711         case IFM_AUTO:
1712                 adapter->hw.phy.autoneg_advertised =
1713                     IXGBE_LINK_SPEED_100_FULL |
1714                     IXGBE_LINK_SPEED_1GB_FULL |
1715                     IXGBE_LINK_SPEED_10GB_FULL;
1716                 break;
1717         default:
1718                 device_printf(adapter->dev, "Only auto media type\n");
1719                 return (EINVAL);
1720         }
1721
1722         return (0);
1723 }
1724
1725 /*********************************************************************
1726  *
1727  *  This routine maps the mbufs to tx descriptors, allowing the
1728  *  TX engine to transmit the packets. 
1729  *      - return 0 on success, positive on failure
1730  *
1731  **********************************************************************/
1732
1733 static int
1734 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1735 {
1736         struct adapter  *adapter = txr->adapter;
1737         u32             olinfo_status = 0, cmd_type_len;
1738         u32             paylen = 0;
1739         int             i, j, error, nsegs, maxsegs;
1740         int             first, last = 0;
1741         struct mbuf     *m_head;
1742         bus_dma_segment_t segs[adapter->num_segs];
1743         bus_dmamap_t    map;
1744         struct ixgbe_tx_buf *txbuf;
1745         union ixgbe_adv_tx_desc *txd = NULL;
1746
1747         m_head = *m_headp;
1748
1749         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1750                 error = ixgbe_tso_pullup(txr, m_headp);
1751                 if (error)
1752                         return error;
1753                 m_head = *m_headp;
1754         }
1755
1756         /* Basic descriptor defines */
1757         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1758             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1759
1760         if (m_head->m_flags & M_VLANTAG)
1761                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1762
1763         /*
1764          * Important to capture the first descriptor
1765          * used because it will contain the index of
1766          * the one we tell the hardware to report back
1767          */
1768         first = txr->next_avail_desc;
1769         txbuf = &txr->tx_buffers[first];
1770         map = txbuf->map;
1771
1772         /*
1773          * Map the packet for DMA.
1774          */
1775         maxsegs = txr->tx_avail - IXGBE_TX_RESERVED;
1776         if (maxsegs > adapter->num_segs)
1777                 maxsegs = adapter->num_segs;
1778
1779         error = bus_dmamap_load_mbuf_defrag(txr->txtag, map, m_headp,
1780             segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1781         if (error) {
1782                 if (error == ENOBUFS)
1783                         adapter->mbuf_defrag_failed++;
1784                 else
1785                         adapter->no_tx_dma_setup++;
1786
1787                 m_freem(*m_headp);
1788                 *m_headp = NULL;
1789                 return (error);
1790         }
1791
1792         /* Make certain there are enough descriptors */
1793         if (nsegs > txr->tx_avail - 2) {
1794                 txr->no_desc_avail++;
1795                 error = ENOBUFS;
1796                 goto xmit_fail;
1797         }
1798         m_head = *m_headp;
1799
1800         /*
1801         ** Set up the appropriate offload context
1802         ** this becomes the first descriptor of 
1803         ** a packet.
1804         */
1805         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1806                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1807                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1808                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1809                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1810                         ++adapter->tso_tx;
1811                 } else
1812                         return (ENXIO);
1813         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1814                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1815
1816 #ifdef IXGBE_IEEE1588
1817         /* This is changing soon to an mtag detection */
1818         if (we detect this mbuf has a TSTAMP mtag)
1819                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1820 #endif
1821
1822 #ifdef IXGBE_FDIR
1823         /* Do the flow director magic */
1824         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1825                 ++txr->atr_count;
1826                 if (txr->atr_count >= atr_sample_rate) {
1827                         ixgbe_atr(txr, m_head);
1828                         txr->atr_count = 0;
1829                 }
1830         }
1831 #endif
1832         /* Record payload length */
1833         if (paylen == 0)
1834                 olinfo_status |= m_head->m_pkthdr.len <<
1835                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1836
1837         i = txr->next_avail_desc;
1838         for (j = 0; j < nsegs; j++) {
1839                 bus_size_t seglen;
1840                 bus_addr_t segaddr;
1841
1842                 txbuf = &txr->tx_buffers[i];
1843                 txd = &txr->tx_base[i];
1844                 seglen = segs[j].ds_len;
1845                 segaddr = htole64(segs[j].ds_addr);
1846
1847                 txd->read.buffer_addr = segaddr;
1848                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1849                     cmd_type_len |seglen);
1850                 txd->read.olinfo_status = htole32(olinfo_status);
1851                 last = i; /* descriptor that will get completion IRQ */
1852
1853                 if (++i == adapter->num_tx_desc)
1854                         i = 0;
1855
1856                 txbuf->m_head = NULL;
1857                 txbuf->eop_index = -1;
1858         }
1859
1860         txd->read.cmd_type_len |=
1861             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1862         txr->tx_avail -= nsegs;
1863         txr->next_avail_desc = i;
1864
1865         txbuf->m_head = m_head;
1866         /* Swap the dma map between the first and last descriptor */
1867         txr->tx_buffers[first].map = txbuf->map;
1868         txbuf->map = map;
1869         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1870
1871         /* Set the index of the descriptor that will be marked done */
1872         txbuf = &txr->tx_buffers[first];
1873         txbuf->eop_index = last;
1874
1875         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1876             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1877         /*
1878          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1879          * hardware that this frame is available to transmit.
1880          */
1881         ++txr->total_packets;
1882         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1883
1884         return (0);
1885
1886 xmit_fail:
1887         bus_dmamap_unload(txr->txtag, txbuf->map);
1888         return (error);
1889
1890 }
1891
1892 static void
1893 ixgbe_set_promisc(struct adapter *adapter)
1894 {
1895         u_int32_t       reg_rctl;
1896         struct ifnet   *ifp = adapter->ifp;
1897
1898         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1899         reg_rctl &= (~IXGBE_FCTRL_UPE);
1900         reg_rctl &= (~IXGBE_FCTRL_MPE);
1901         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1902
1903         if (ifp->if_flags & IFF_PROMISC) {
1904                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1905                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1906         } else if (ifp->if_flags & IFF_ALLMULTI) {
1907                 reg_rctl |= IXGBE_FCTRL_MPE;
1908                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1909                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1910         }
1911         return;
1912 }
1913
1914
1915 /*********************************************************************
1916  *  Multicast Update
1917  *
1918  *  This routine is called whenever multicast address list is updated.
1919  *
1920  **********************************************************************/
1921 #define IXGBE_RAR_ENTRIES 16
1922
1923 static void
1924 ixgbe_set_multi(struct adapter *adapter)
1925 {
1926         u32     fctrl;
1927         u8      *mta;
1928         u8      *update_ptr;
1929         struct  ifmultiaddr *ifma;
1930         int     mcnt = 0;
1931         struct ifnet   *ifp = adapter->ifp;
1932
1933         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1934
1935         mta = adapter->mta;
1936         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1937             MAX_NUM_MULTICAST_ADDRESSES);
1938
1939         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1940         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1941         if (ifp->if_flags & IFF_PROMISC)
1942                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1943         else if (ifp->if_flags & IFF_ALLMULTI) {
1944                 fctrl |= IXGBE_FCTRL_MPE;
1945                 fctrl &= ~IXGBE_FCTRL_UPE;
1946         } else
1947                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1948         
1949         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1950
1951         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1952                 if (ifma->ifma_addr->sa_family != AF_LINK)
1953                         continue;
1954                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1955                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1956                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1957                 mcnt++;
1958         }
1959
1960         update_ptr = mta;
1961         ixgbe_update_mc_addr_list(&adapter->hw,
1962             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1963
1964         return;
1965 }
1966
1967 /*
1968  * This is an iterator function now needed by the multicast
1969  * shared code. It simply feeds the shared code routine the
1970  * addresses in the array of ixgbe_set_multi() one by one.
1971  */
1972 static u8 *
1973 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1974 {
1975         u8 *addr = *update_ptr;
1976         u8 *newptr;
1977         *vmdq = 0;
1978
1979         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1980         *update_ptr = newptr;
1981         return addr;
1982 }
1983
1984
1985 /*********************************************************************
1986  *  Timer routine
1987  *
1988  *  This routine checks for link status,updates statistics,
1989  *  and runs the watchdog check.
1990  *
1991  **********************************************************************/
1992
1993 static void
1994 ixgbe_local_timer(void *arg)
1995 {
1996         struct adapter  *adapter = arg;
1997         device_t        dev = adapter->dev;
1998         struct ifnet    *ifp = adapter->ifp;
1999         struct ix_queue *que = adapter->queues;
2000         struct tx_ring  *txr = adapter->tx_rings;
2001         int             hung, busy, paused;
2002
2003         IXGBE_CORE_LOCK(adapter);
2004         hung = busy = paused = 0;
2005
2006         /* Check for pluggable optics */
2007         if (adapter->sfp_probe)
2008                 if (!ixgbe_sfp_probe(adapter))
2009                         goto out; /* Nothing to do */
2010
2011         ixgbe_update_link_status(adapter);
2012         ixgbe_update_stats_counters(adapter);
2013
2014         /*
2015          * If the interface has been paused
2016          * then don't do the watchdog check
2017          */
2018         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2019                 paused = 1;
2020
2021         /*
2022         ** Check the TX queues status
2023         **      - central locked handling of OACTIVE
2024         **      - watchdog only if all queues show hung
2025         */          
2026         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2027                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
2028                     (paused == 0))
2029                         ++hung;
2030                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
2031                         ++busy;
2032                 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
2033                         taskqueue_enqueue(que->tq, &que->que_task);
2034         }
2035         /* Only truely watchdog if all queues show hung */
2036         if (hung == adapter->num_queues)
2037                 goto watchdog;
2038         /* Only turn off the stack flow when ALL are depleted */
2039         if (busy == adapter->num_queues)
2040                 ifp->if_flags |= IFF_OACTIVE;
2041         else if ((ifp->if_flags & IFF_OACTIVE) &&
2042             (busy < adapter->num_queues))
2043                 ifp->if_flags &= ~IFF_OACTIVE;
2044
2045 out:
2046         ixgbe_rearm_queues(adapter, adapter->que_mask);
2047         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2048         IXGBE_CORE_UNLOCK(adapter);
2049         return;
2050
2051 watchdog:
2052         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2053         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2054             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2055             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2056         device_printf(dev,"TX(%d) desc avail = %d,"
2057             "Next TX to Clean = %d\n",
2058             txr->me, txr->tx_avail, txr->next_to_clean);
2059         adapter->ifp->if_flags &= ~IFF_RUNNING;
2060         adapter->watchdog_events++;
2061         ixgbe_init_locked(adapter);
2062
2063         IXGBE_CORE_UNLOCK(adapter);
2064 }
2065
2066 /*
2067 ** Note: this routine updates the OS on the link state
2068 **      the real check of the hardware only happens with
2069 **      a link interrupt.
2070 */
2071 static void
2072 ixgbe_update_link_status(struct adapter *adapter)
2073 {
2074         struct ifnet    *ifp = adapter->ifp;
2075         struct tx_ring *txr = adapter->tx_rings;
2076         device_t dev = adapter->dev;
2077
2078
2079         if (adapter->link_up){ 
2080                 if (adapter->link_active == FALSE) {
2081                         if (bootverbose)
2082                                 device_printf(dev,"Link is up %d Gbps %s \n",
2083                                     ((adapter->link_speed == 128)? 10:1),
2084                                     "Full Duplex");
2085                         adapter->link_active = TRUE;
2086                         /* Update any Flow Control changes */
2087                         ixgbe_fc_enable(&adapter->hw);
2088                         ifp->if_link_state = LINK_STATE_UP;
2089                         if_link_state_change(ifp);
2090                 }
2091         } else { /* Link down */
2092                 if (adapter->link_active == TRUE) {
2093                         if (bootverbose)
2094                                 device_printf(dev,"Link is Down\n");
2095                         ifp->if_link_state = LINK_STATE_DOWN;
2096                         if_link_state_change(ifp);
2097                         adapter->link_active = FALSE;
2098                         for (int i = 0; i < adapter->num_queues;
2099                             i++, txr++)
2100                                 txr->queue_status = IXGBE_QUEUE_IDLE;
2101                 }
2102         }
2103
2104         return;
2105 }
2106
2107
2108 /*********************************************************************
2109  *
2110  *  This routine disables all traffic on the adapter by issuing a
2111  *  global reset on the MAC and deallocates TX/RX buffers.
2112  *
2113  **********************************************************************/
2114
2115 static void
2116 ixgbe_stop(void *arg)
2117 {
2118         struct ifnet   *ifp;
2119         struct adapter *adapter = arg;
2120         struct ixgbe_hw *hw = &adapter->hw;
2121         ifp = adapter->ifp;
2122
2123         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2124
2125         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2126         ixgbe_disable_intr(adapter);
2127         callout_stop(&adapter->timer);
2128
2129         /* Let the stack know...*/
2130         ifp->if_flags &= ~IFF_RUNNING;
2131         ifp->if_flags |= IFF_OACTIVE;
2132
2133         ixgbe_reset_hw(hw);
2134         hw->adapter_stopped = FALSE;
2135         ixgbe_stop_adapter(hw);
2136         /* Turn off the laser */
2137         if (hw->phy.multispeed_fiber)
2138                 ixgbe_disable_tx_laser(hw);
2139
2140         /* reprogram the RAR[0] in case user changed it. */
2141         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2142
2143         return;
2144 }
2145
2146
2147 /*********************************************************************
2148  *
2149  *  Determine hardware revision.
2150  *
2151  **********************************************************************/
2152 static void
2153 ixgbe_identify_hardware(struct adapter *adapter)
2154 {
2155         device_t        dev = adapter->dev;
2156         struct ixgbe_hw *hw = &adapter->hw;
2157
2158         /* Save off the information about this board */
2159         hw->vendor_id = pci_get_vendor(dev);
2160         hw->device_id = pci_get_device(dev);
2161         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2162         hw->subsystem_vendor_id =
2163             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2164         hw->subsystem_device_id =
2165             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2166
2167         /* We need this here to set the num_segs below */
2168         ixgbe_set_mac_type(hw);
2169
2170         /* Pick up the 82599 and VF settings */
2171         if (hw->mac.type != ixgbe_mac_82598EB) {
2172                 hw->phy.smart_speed = ixgbe_smart_speed;
2173                 adapter->num_segs = IXGBE_82599_SCATTER;
2174         } else
2175                 adapter->num_segs = IXGBE_82598_SCATTER;
2176
2177         return;
2178 }
2179
2180 /*********************************************************************
2181  *
2182  *  Determine optic type
2183  *
2184  **********************************************************************/
2185 static void
2186 ixgbe_setup_optics(struct adapter *adapter)
2187 {
2188         struct ixgbe_hw *hw = &adapter->hw;
2189         int             layer;
2190         
2191         layer = ixgbe_get_supported_physical_layer(hw);
2192
2193         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2194                 adapter->optics = IFM_10G_T;
2195                 return;
2196         }
2197
2198         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2199                 adapter->optics = IFM_1000_T;
2200                 return;
2201         }
2202
2203         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2204             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2205                 adapter->optics = IFM_10G_LR;
2206                 return;
2207         }
2208
2209         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2210                 adapter->optics = IFM_10G_SR;
2211                 return;
2212         }
2213
2214         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2215                 adapter->optics = IFM_10G_TWINAX;
2216                 return;
2217         }
2218
2219         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2220             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2221                 adapter->optics = IFM_10G_CX4;
2222                 return;
2223         }
2224
2225         /* If we get here just set the default */
2226         adapter->optics = IFM_ETHER | IFM_AUTO;
2227         return;
2228 }
2229
2230 /*********************************************************************
2231  *
2232  *  Setup the Legacy or MSI Interrupt handler
2233  *
2234  **********************************************************************/
2235 static int
2236 ixgbe_allocate_legacy(struct adapter *adapter)
2237 {
2238         device_t dev = adapter->dev;
2239         struct          ix_queue *que = adapter->queues;
2240         int error, rid = 0;
2241         unsigned int intr_flags;
2242
2243         /* MSI RID at 1 */
2244         if (adapter->msix == 1)
2245                 rid = 1;
2246
2247         /* Try allocating a MSI interrupt first */
2248         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2249                 &rid, &intr_flags);
2250
2251         /* We allocate a single interrupt resource */
2252         adapter->res = bus_alloc_resource_any(dev,
2253             SYS_RES_IRQ, &rid, intr_flags);
2254         if (adapter->res == NULL) {
2255                 device_printf(dev, "Unable to allocate bus resource: "
2256                     "interrupt\n");
2257                 return (ENXIO);
2258         }
2259
2260         /*
2261          * Try allocating a fast interrupt and the associated deferred
2262          * processing contexts.
2263          */
2264         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2265         que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2266             taskqueue_thread_enqueue, &que->tq);
2267         taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s ixq",
2268             device_get_nameunit(adapter->dev));
2269
2270         /* Tasklets for Link, SFP and Multispeed Fiber */
2271         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2272         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2273         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2274 #ifdef IXGBE_FDIR
2275         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2276 #endif
2277         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2278             taskqueue_thread_enqueue, &adapter->tq);
2279         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2280             device_get_nameunit(adapter->dev));
2281
2282         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2283             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2284                 device_printf(dev, "Failed to register fast interrupt "
2285                     "handler: %d\n", error);
2286                 taskqueue_free(que->tq);
2287                 taskqueue_free(adapter->tq);
2288                 que->tq = NULL;
2289                 adapter->tq = NULL;
2290                 return (error);
2291         }
2292         /* For simplicity in the handlers */
2293         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2294
2295         return (0);
2296 }
2297
2298
2299 /*********************************************************************
2300  *
2301  *  Setup MSIX Interrupt resources and handlers 
2302  *
2303  **********************************************************************/
2304 static int
2305 ixgbe_allocate_msix(struct adapter *adapter)
2306 {
2307         device_t        dev = adapter->dev;
2308         struct          ix_queue *que = adapter->queues;
2309         int             error, rid, vector = 0;
2310         char            desc[16];
2311
2312         error = pci_setup_msix(dev);
2313         if (error) {
2314                 device_printf(dev, "MSI-X setup failed\n");
2315                 return (error);
2316         }
2317
2318         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2319                 rid = vector + 1;
2320
2321                 /*
2322                 ** Bind the msix vector, and thus the
2323                 ** ring to the corresponding cpu.
2324                 */
2325                 error = pci_alloc_msix_vector(dev, vector, &rid, i);
2326                 if (error) {
2327                         device_printf(dev, "pci_alloc_msix_vector failed\n");
2328                         return (error);
2329                 }
2330
2331                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2332                     RF_SHAREABLE | RF_ACTIVE);
2333                 if (que->res == NULL) {
2334                         device_printf(dev,"Unable to allocate"
2335                             " bus resource: que interrupt [%d]\n", vector);
2336                         return (ENXIO);
2337                 }
2338                 /* Set the handler function */
2339                 ksnprintf(desc, sizeof(desc), "%s que %d",
2340                     device_get_nameunit(dev), i);
2341                 error = bus_setup_intr_descr(dev, que->res, INTR_MPSAFE,
2342                     ixgbe_msix_que, que, &que->tag, &que->serializer, desc);
2343                 if (error) {
2344                         que->res = NULL;
2345                         device_printf(dev, "Failed to register QUE handler");
2346                         return (error);
2347                 }
2348                 que->msix = vector;
2349                 adapter->que_mask |= (u64)(1 << que->msix);
2350
2351                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2352                 que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2353                     taskqueue_thread_enqueue, &que->tq);
2354                 taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s que",
2355                     device_get_nameunit(adapter->dev));
2356         }
2357
2358         /* and Link, bind vector to cpu #0 */
2359         rid = vector + 1;
2360         error = pci_alloc_msix_vector(dev, vector, &rid, 0);
2361         if (error) {
2362                 device_printf(dev, "pci_alloc_msix_vector failed\n");
2363                 return (error);
2364         }
2365         adapter->res = bus_alloc_resource_any(dev,
2366             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2367         if (!adapter->res) {
2368                 device_printf(dev,"Unable to allocate"
2369             " bus resource: Link interrupt [%d]\n", rid);
2370                 return (ENXIO);
2371         }
2372         /* Set the link handler function */
2373         error = bus_setup_intr_descr(dev, adapter->res, INTR_MPSAFE,
2374             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer,
2375             "link");
2376         if (error) {
2377                 adapter->res = NULL;
2378                 device_printf(dev, "Failed to register LINK handler");
2379                 return (error);
2380         }
2381         pci_enable_msix(dev);
2382
2383         adapter->linkvec = vector;
2384         /* Tasklets for Link, SFP and Multispeed Fiber */
2385         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2386         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2387         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2388 #ifdef IXGBE_FDIR
2389         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2390 #endif
2391         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2392             taskqueue_thread_enqueue, &adapter->tq);
2393         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2394             device_get_nameunit(adapter->dev));
2395
2396         return (0);
2397 }
2398
2399 /*
2400  * Setup Either MSI/X or MSI
2401  */
2402 static int
2403 ixgbe_setup_msix(struct adapter *adapter)
2404 {
2405         device_t dev = adapter->dev;
2406         int rid, want, queues, msgs;
2407
2408         /* Override by tuneable */
2409         if (ixgbe_enable_msix == 0)
2410                 goto msi;
2411
2412         /* First try MSI/X */
2413         rid = PCIR_BAR(MSIX_82598_BAR);
2414         adapter->msix_mem = bus_alloc_resource_any(dev,
2415             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2416         if (!adapter->msix_mem) {
2417                 rid += 4;       /* 82599 maps in higher BAR */
2418                 adapter->msix_mem = bus_alloc_resource_any(dev,
2419                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2420         }
2421         if (!adapter->msix_mem) {
2422                 /* May not be enabled */
2423                 device_printf(adapter->dev,
2424                     "Unable to map MSIX table \n");
2425                 goto msi;
2426         }
2427
2428         msgs = pci_msix_count(dev); 
2429         if (msgs == 0) { /* system has msix disabled */
2430                 bus_release_resource(dev, SYS_RES_MEMORY,
2431                     rid, adapter->msix_mem);
2432                 adapter->msix_mem = NULL;
2433                 goto msi;
2434         }
2435
2436         /* Figure out a reasonable auto config value */
2437         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2438
2439         if (ixgbe_num_queues != 0)
2440                 queues = ixgbe_num_queues;
2441         /* Set max queues to 8 when autoconfiguring */
2442         else if ((ixgbe_num_queues == 0) && (queues > 8))
2443                 queues = 8;
2444
2445         /*
2446         ** Want one vector (RX/TX pair) per queue
2447         ** plus an additional for Link.
2448         */
2449         want = queues + 1;
2450         if (msgs >= want)
2451                 msgs = want;
2452         else {
2453                 device_printf(adapter->dev,
2454                     "MSIX Configuration Problem, "
2455                     "%d vectors but %d queues wanted!\n",
2456                     msgs, want);
2457                 return (0); /* Will go to Legacy setup */
2458         }
2459         if (msgs) {
2460                 device_printf(adapter->dev,
2461                     "Using MSIX interrupts with %d vectors\n", msgs);
2462                 adapter->num_queues = queues;
2463                 return (msgs);
2464         }
2465 msi:
2466         msgs = pci_msi_count(dev);
2467         return (msgs);
2468 }
2469
2470
2471 static int
2472 ixgbe_allocate_pci_resources(struct adapter *adapter)
2473 {
2474         int             rid;
2475         device_t        dev = adapter->dev;
2476
2477         rid = PCIR_BAR(0);
2478         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2479             &rid, RF_ACTIVE);
2480
2481         if (!(adapter->pci_mem)) {
2482                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2483                 return (ENXIO);
2484         }
2485
2486         adapter->osdep.mem_bus_space_tag =
2487                 rman_get_bustag(adapter->pci_mem);
2488         adapter->osdep.mem_bus_space_handle =
2489                 rman_get_bushandle(adapter->pci_mem);
2490         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2491
2492         /* Legacy defaults */
2493         adapter->num_queues = 1;
2494         adapter->hw.back = &adapter->osdep;
2495
2496         /*
2497         ** Now setup MSI or MSI/X, should
2498         ** return us the number of supported
2499         ** vectors. (Will be 1 for MSI)
2500         */
2501         adapter->msix = ixgbe_setup_msix(adapter);
2502         return (0);
2503 }
2504
2505 static void
2506 ixgbe_free_pci_resources(struct adapter * adapter)
2507 {
2508         struct          ix_queue *que = adapter->queues;
2509         device_t        dev = adapter->dev;
2510         int             rid, memrid;
2511
2512         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2513                 memrid = PCIR_BAR(MSIX_82598_BAR);
2514         else
2515                 memrid = PCIR_BAR(MSIX_82599_BAR);
2516
2517         /*
2518         ** There is a slight possibility of a failure mode
2519         ** in attach that will result in entering this function
2520         ** before interrupt resources have been initialized, and
2521         ** in that case we do not want to execute the loops below
2522         ** We can detect this reliably by the state of the adapter
2523         ** res pointer.
2524         */
2525         if (adapter->res == NULL)
2526                 goto mem;
2527
2528         /*
2529         **  Release all msix queue resources:
2530         */
2531         for (int i = 0; i < adapter->num_queues; i++, que++) {
2532                 rid = que->msix + 1;
2533                 if (que->tag != NULL) {
2534                         bus_teardown_intr(dev, que->res, que->tag);
2535                         que->tag = NULL;
2536                 }
2537                 if (que->res != NULL)
2538                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2539         }
2540
2541
2542         /* Clean the Legacy or Link interrupt last */
2543         if (adapter->linkvec) /* we are doing MSIX */
2544                 rid = adapter->linkvec + 1;
2545         else
2546                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2547
2548         if (adapter->tag != NULL) {
2549                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2550                 adapter->tag = NULL;
2551         }
2552         if (adapter->res != NULL)
2553                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2554         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2555                 pci_release_msi(adapter->dev);
2556
2557 mem:
2558         if (adapter->msix)
2559                 pci_release_msi(dev);
2560
2561         if (adapter->msix_mem != NULL)
2562                 bus_release_resource(dev, SYS_RES_MEMORY,
2563                     memrid, adapter->msix_mem);
2564
2565         if (adapter->pci_mem != NULL)
2566                 bus_release_resource(dev, SYS_RES_MEMORY,
2567                     PCIR_BAR(0), adapter->pci_mem);
2568
2569         return;
2570 }
2571
2572 /*********************************************************************
2573  *
2574  *  Setup networking device structure and register an interface.
2575  *
2576  **********************************************************************/
2577 static int
2578 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2579 {
2580         struct ixgbe_hw *hw = &adapter->hw;
2581         struct ifnet   *ifp;
2582
2583         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2584
2585         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2586         if (ifp == NULL) {
2587                 device_printf(dev, "can not allocate ifnet structure\n");
2588                 return (-1);
2589         }
2590         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2591         ifp->if_baudrate = 1000000000;
2592         ifp->if_init = ixgbe_init;
2593         ifp->if_softc = adapter;
2594         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2595         ifp->if_ioctl = ixgbe_ioctl;
2596         ifp->if_start = ixgbe_start;
2597 #if 0 /* __FreeBSD_version >= 800000 */
2598         ifp->if_transmit = ixgbe_mq_start;
2599         ifp->if_qflush = ixgbe_qflush;
2600 #endif
2601         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2602
2603         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2604
2605         adapter->max_frame_size =
2606             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2607
2608         /*
2609          * Tell the upper layer(s) we support long frames.
2610          */
2611         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2612
2613         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2614         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2615         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2616 #if 0 /* NET_TSO */
2617                              |  IFCAP_VLAN_HWTSO
2618 #endif
2619                              |  IFCAP_VLAN_MTU;
2620         ifp->if_capenable = ifp->if_capabilities;
2621
2622         /* Don't enable LRO by default */
2623 #if 0 /* NET_LRO */
2624         ifp->if_capabilities |= IFCAP_LRO;
2625 #endif
2626
2627         /*
2628         ** Don't turn this on by default, if vlans are
2629         ** created on another pseudo device (eg. lagg)
2630         ** then vlan events are not passed thru, breaking
2631         ** operation, but with HW FILTER off it works. If
2632         ** using vlans directly on the ixgbe driver you can
2633         ** enable this and get full hardware tag filtering.
2634         */
2635         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2636
2637         /*
2638          * Specify the media types supported by this adapter and register
2639          * callbacks to update media and link information
2640          */
2641         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2642                      ixgbe_media_status);
2643         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2644         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2645         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2646                 ifmedia_add(&adapter->media,
2647                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2648                 ifmedia_add(&adapter->media,
2649                     IFM_ETHER | IFM_1000_T, 0, NULL);
2650         }
2651         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2652         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2653
2654         return (0);
2655 }
2656
2657 static void
2658 ixgbe_config_link(struct adapter *adapter)
2659 {
2660         struct ixgbe_hw *hw = &adapter->hw;
2661         u32     autoneg, err = 0;
2662         bool    sfp, negotiate;
2663
2664         sfp = ixgbe_is_sfp(hw);
2665
2666         if (sfp) { 
2667                 if (hw->phy.multispeed_fiber) {
2668                         hw->mac.ops.setup_sfp(hw);
2669                         ixgbe_enable_tx_laser(hw);
2670                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2671                 } else
2672                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2673         } else {
2674                 if (hw->mac.ops.check_link)
2675                         err = ixgbe_check_link(hw, &autoneg,
2676                             &adapter->link_up, FALSE);
2677                 if (err)
2678                         goto out;
2679                 autoneg = hw->phy.autoneg_advertised;
2680                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2681                         err  = hw->mac.ops.get_link_capabilities(hw,
2682                             &autoneg, &negotiate);
2683                 if (err)
2684                         goto out;
2685                 if (hw->mac.ops.setup_link)
2686                         err = hw->mac.ops.setup_link(hw, autoneg,
2687                             negotiate, adapter->link_up);
2688         }
2689 out:
2690         return;
2691 }
2692
2693 /********************************************************************
2694  * Manage DMA'able memory.
2695  *******************************************************************/
2696 static void
2697 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2698 {
2699         if (error)
2700                 return;
2701         *(bus_addr_t *) arg = segs->ds_addr;
2702         return;
2703 }
2704
2705 static int
2706 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2707                 struct ixgbe_dma_alloc *dma, int mapflags)
2708 {
2709         device_t dev = adapter->dev;
2710         int             r;
2711
2712         r = bus_dma_tag_create(NULL,    /* parent */
2713                                DBA_ALIGN, 0,    /* alignment, bounds */
2714                                BUS_SPACE_MAXADDR,       /* lowaddr */
2715                                BUS_SPACE_MAXADDR,       /* highaddr */
2716                                NULL, NULL,      /* filter, filterarg */
2717                                size,    /* maxsize */
2718                                1,       /* nsegments */
2719                                size,    /* maxsegsize */
2720                                BUS_DMA_ALLOCNOW,        /* flags */
2721                                &dma->dma_tag);
2722         if (r != 0) {
2723                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2724                        "error %u\n", r);
2725                 goto fail_0;
2726         }
2727         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2728                              BUS_DMA_NOWAIT, &dma->dma_map);
2729         if (r != 0) {
2730                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2731                        "error %u\n", r);
2732                 goto fail_1;
2733         }
2734         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2735                             size,
2736                             ixgbe_dmamap_cb,
2737                             &dma->dma_paddr,
2738                             mapflags | BUS_DMA_NOWAIT);
2739         if (r != 0) {
2740                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2741                        "error %u\n", r);
2742                 goto fail_2;
2743         }
2744         dma->dma_size = size;
2745         return (0);
2746 fail_2:
2747         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2748 fail_1:
2749         bus_dma_tag_destroy(dma->dma_tag);
2750 fail_0:
2751         dma->dma_map = NULL;
2752         dma->dma_tag = NULL;
2753         return (r);
2754 }
2755
2756 static void
2757 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2758 {
2759         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2760             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2761         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2762         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2763         bus_dma_tag_destroy(dma->dma_tag);
2764 }
2765
2766
2767 /*********************************************************************
2768  *
2769  *  Allocate memory for the transmit and receive rings, and then
2770  *  the descriptors associated with each, called only once at attach.
2771  *
2772  **********************************************************************/
2773 static int
2774 ixgbe_allocate_queues(struct adapter *adapter)
2775 {
2776         device_t        dev = adapter->dev;
2777         struct ix_queue *que;
2778         struct tx_ring  *txr;
2779         struct rx_ring  *rxr;
2780         int rsize, tsize, error = IXGBE_SUCCESS;
2781         int txconf = 0, rxconf = 0;
2782
2783         /* First allocate the top level queue structs */
2784         if (!(adapter->queues =
2785             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2786             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2787                 device_printf(dev, "Unable to allocate queue memory\n");
2788                 error = ENOMEM;
2789                 goto fail;
2790         }
2791
2792         /* First allocate the TX ring struct memory */
2793         if (!(adapter->tx_rings =
2794             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2795             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2796                 device_printf(dev, "Unable to allocate TX ring memory\n");
2797                 error = ENOMEM;
2798                 goto tx_fail;
2799         }
2800
2801         /* Next allocate the RX */
2802         if (!(adapter->rx_rings =
2803             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2804             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2805                 device_printf(dev, "Unable to allocate RX ring memory\n");
2806                 error = ENOMEM;
2807                 goto rx_fail;
2808         }
2809
2810         /* For the ring itself */
2811         tsize = roundup2(adapter->num_tx_desc *
2812             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2813
2814         /*
2815          * Now set up the TX queues, txconf is needed to handle the
2816          * possibility that things fail midcourse and we need to
2817          * undo memory gracefully
2818          */ 
2819         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2820                 /* Set up some basics */
2821                 txr = &adapter->tx_rings[i];
2822                 txr->adapter = adapter;
2823                 txr->me = i;
2824
2825                 /* Initialize the TX side lock */
2826                 ksnprintf(txr->lock_name, sizeof(txr->lock_name), "%s:tx(%d)",
2827                     device_get_nameunit(dev), txr->me);
2828                 lockinit(&txr->tx_lock, txr->lock_name, 0, LK_CANRECURSE);
2829
2830                 if (ixgbe_dma_malloc(adapter, tsize,
2831                         &txr->txdma, BUS_DMA_NOWAIT)) {
2832                         device_printf(dev,
2833                             "Unable to allocate TX Descriptor memory\n");
2834                         error = ENOMEM;
2835                         goto err_tx_desc;
2836                 }
2837                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2838                 bzero((void *)txr->tx_base, tsize);
2839
2840                 /* Now allocate transmit buffers for the ring */
2841                 if (ixgbe_allocate_transmit_buffers(txr)) {
2842                         device_printf(dev,
2843                             "Critical Failure setting up transmit buffers\n");
2844                         error = ENOMEM;
2845                         goto err_tx_desc;
2846                 }
2847 #if 0 /* __FreeBSD_version >= 800000 */
2848                 /* Allocate a buf ring */
2849                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2850                     M_WAITOK, &txr->tx_mtx);
2851                 if (txr->br == NULL) {
2852                         device_printf(dev,
2853                             "Critical Failure setting up buf ring\n");
2854                         error = ENOMEM;
2855                         goto err_tx_desc;
2856                 }
2857 #endif
2858         }
2859
2860         /*
2861          * Next the RX queues...
2862          */ 
2863         rsize = roundup2(adapter->num_rx_desc *
2864             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2865         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2866                 rxr = &adapter->rx_rings[i];
2867                 /* Set up some basics */
2868                 rxr->adapter = adapter;
2869                 rxr->me = i;
2870
2871                 /* Initialize the RX side lock */
2872                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2873                     device_get_nameunit(dev), rxr->me);
2874                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2875
2876                 if (ixgbe_dma_malloc(adapter, rsize,
2877                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2878                         device_printf(dev,
2879                             "Unable to allocate RxDescriptor memory\n");
2880                         error = ENOMEM;
2881                         goto err_rx_desc;
2882                 }
2883                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2884                 bzero((void *)rxr->rx_base, rsize);
2885
2886                 /* Allocate receive buffers for the ring*/
2887                 if (ixgbe_allocate_receive_buffers(rxr)) {
2888                         device_printf(dev,
2889                             "Critical Failure setting up receive buffers\n");
2890                         error = ENOMEM;
2891                         goto err_rx_desc;
2892                 }
2893         }
2894
2895         /*
2896         ** Finally set up the queue holding structs
2897         */
2898         for (int i = 0; i < adapter->num_queues; i++) {
2899                 que = &adapter->queues[i];
2900                 que->adapter = adapter;
2901                 que->txr = &adapter->tx_rings[i];
2902                 que->rxr = &adapter->rx_rings[i];
2903         }
2904
2905         return (0);
2906
2907 err_rx_desc:
2908         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2909                 ixgbe_dma_free(adapter, &rxr->rxdma);
2910 err_tx_desc:
2911         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2912                 ixgbe_dma_free(adapter, &txr->txdma);
2913         kfree(adapter->rx_rings, M_DEVBUF);
2914 rx_fail:
2915         kfree(adapter->tx_rings, M_DEVBUF);
2916 tx_fail:
2917         kfree(adapter->queues, M_DEVBUF);
2918 fail:
2919         return (error);
2920 }
2921
2922 /*********************************************************************
2923  *
2924  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2925  *  the information needed to transmit a packet on the wire. This is
2926  *  called only once at attach, setup is done every reset.
2927  *
2928  **********************************************************************/
2929 static int
2930 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2931 {
2932         struct adapter *adapter = txr->adapter;
2933         device_t dev = adapter->dev;
2934         struct ixgbe_tx_buf *txbuf;
2935         int error, i;
2936
2937         /*
2938          * Setup DMA descriptor areas.
2939          */
2940         if ((error = bus_dma_tag_create(
2941                                NULL,    /* parent */
2942                                1, 0,            /* alignment, bounds */
2943                                BUS_SPACE_MAXADDR,       /* lowaddr */
2944                                BUS_SPACE_MAXADDR,       /* highaddr */
2945                                NULL, NULL,              /* filter, filterarg */
2946                                IXGBE_TSO_SIZE,          /* maxsize */
2947                                adapter->num_segs,       /* nsegments */
2948                                PAGE_SIZE,               /* maxsegsize */
2949                                0,                       /* flags */
2950                                &txr->txtag))) {
2951                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2952                 goto fail;
2953         }
2954
2955         if (!(txr->tx_buffers =
2956             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2957             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2958                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2959                 error = ENOMEM;
2960                 goto fail;
2961         }
2962
2963         /* Create the descriptor buffer dma maps */
2964         txbuf = txr->tx_buffers;
2965         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2966                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2967                 if (error != 0) {
2968                         device_printf(dev, "Unable to create TX DMA map\n");
2969                         goto fail;
2970                 }
2971         }
2972
2973         return 0;
2974 fail:
2975         /* We free all, it handles case where we are in the middle */
2976         ixgbe_free_transmit_structures(adapter);
2977         return (error);
2978 }
2979
2980 /*********************************************************************
2981  *
2982  *  Initialize a transmit ring.
2983  *
2984  **********************************************************************/
2985 static void
2986 ixgbe_setup_transmit_ring(struct tx_ring *txr)
2987 {
2988         struct adapter *adapter = txr->adapter;
2989         struct ixgbe_tx_buf *txbuf;
2990         int i;
2991 #ifdef DEV_NETMAP
2992         struct netmap_adapter *na = NA(adapter->ifp);
2993         struct netmap_slot *slot;
2994 #endif /* DEV_NETMAP */
2995
2996         /* Clear the old ring contents */
2997         IXGBE_TX_LOCK(txr);
2998 #ifdef DEV_NETMAP
2999         /*
3000          * (under lock): if in netmap mode, do some consistency
3001          * checks and set slot to entry 0 of the netmap ring.
3002          */
3003         slot = netmap_reset(na, NR_TX, txr->me, 0);
3004 #endif /* DEV_NETMAP */
3005         bzero((void *)txr->tx_base,
3006               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3007         /* Reset indices */
3008         txr->next_avail_desc = 0;
3009         txr->next_to_clean = 0;
3010
3011         /* Free any existing tx buffers. */
3012         txbuf = txr->tx_buffers;
3013         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3014                 if (txbuf->m_head != NULL) {
3015                         bus_dmamap_sync(txr->txtag, txbuf->map,
3016                             BUS_DMASYNC_POSTWRITE);
3017                         bus_dmamap_unload(txr->txtag, txbuf->map);
3018                         m_freem(txbuf->m_head);
3019                         txbuf->m_head = NULL;
3020                 }
3021 #ifdef DEV_NETMAP
3022                 /*
3023                  * In netmap mode, set the map for the packet buffer.
3024                  * NOTE: Some drivers (not this one) also need to set
3025                  * the physical buffer address in the NIC ring.
3026                  * Slots in the netmap ring (indexed by "si") are
3027                  * kring->nkr_hwofs positions "ahead" wrt the
3028                  * corresponding slot in the NIC ring. In some drivers
3029                  * (not here) nkr_hwofs can be negative. Function
3030                  * netmap_idx_n2k() handles wraparounds properly.
3031                  */
3032                 if (slot) {
3033                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3034                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3035                 }
3036 #endif /* DEV_NETMAP */
3037                 /* Clear the EOP index */
3038                 txbuf->eop_index = -1;
3039         }
3040
3041 #ifdef IXGBE_FDIR
3042         /* Set the rate at which we sample packets */
3043         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3044                 txr->atr_sample = atr_sample_rate;
3045 #endif
3046
3047         /* Set number of descriptors available */
3048         txr->tx_avail = adapter->num_tx_desc;
3049
3050         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3051             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3052         IXGBE_TX_UNLOCK(txr);
3053 }
3054
3055 /*********************************************************************
3056  *
3057  *  Initialize all transmit rings.
3058  *
3059  **********************************************************************/
3060 static int
3061 ixgbe_setup_transmit_structures(struct adapter *adapter)
3062 {
3063         struct tx_ring *txr = adapter->tx_rings;
3064
3065         for (int i = 0; i < adapter->num_queues; i++, txr++)
3066                 ixgbe_setup_transmit_ring(txr);
3067
3068         return (0);
3069 }
3070
3071 /*********************************************************************
3072  *
3073  *  Enable transmit unit.
3074  *
3075  **********************************************************************/
3076 static void
3077 ixgbe_initialize_transmit_units(struct adapter *adapter)
3078 {
3079         struct tx_ring  *txr = adapter->tx_rings;
3080         struct ixgbe_hw *hw = &adapter->hw;
3081
3082         /* Setup the Base and Length of the Tx Descriptor Ring */
3083
3084         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3085                 u64     tdba = txr->txdma.dma_paddr;
3086                 u32     txctrl;
3087
3088                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3089                        (tdba & 0x00000000ffffffffULL));
3090                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3091                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3092                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
3093
3094                 /* Setup the HW Tx Head and Tail descriptor pointers */
3095                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3096                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3097
3098                 /* Setup Transmit Descriptor Cmd Settings */
3099                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3100                 txr->queue_status = IXGBE_QUEUE_IDLE;
3101
3102                 /* Disable Head Writeback */
3103                 switch (hw->mac.type) {
3104                 case ixgbe_mac_82598EB:
3105                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3106                         break;
3107                 case ixgbe_mac_82599EB:
3108                 case ixgbe_mac_X540:
3109                 default:
3110                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3111                         break;
3112                 }
3113                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3114                 switch (hw->mac.type) {
3115                 case ixgbe_mac_82598EB:
3116                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3117                         break;
3118                 case ixgbe_mac_82599EB:
3119                 case ixgbe_mac_X540:
3120                 default:
3121                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3122                         break;
3123                 }
3124
3125         }
3126
3127         if (hw->mac.type != ixgbe_mac_82598EB) {
3128                 u32 dmatxctl, rttdcs;
3129                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3130                 dmatxctl |= IXGBE_DMATXCTL_TE;
3131                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3132                 /* Disable arbiter to set MTQC */
3133                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3134                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3135                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3136                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3137                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3138                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3139         }
3140
3141         return;
3142 }
3143
3144 /*********************************************************************
3145  *
3146  *  Free all transmit rings.
3147  *
3148  **********************************************************************/
3149 static void
3150 ixgbe_free_transmit_structures(struct adapter *adapter)
3151 {
3152         struct tx_ring *txr = adapter->tx_rings;
3153
3154         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3155                 IXGBE_TX_LOCK(txr);
3156                 ixgbe_free_transmit_buffers(txr);
3157                 ixgbe_dma_free(adapter, &txr->txdma);
3158                 IXGBE_TX_UNLOCK(txr);
3159                 IXGBE_TX_LOCK_DESTROY(txr);
3160         }
3161         kfree(adapter->tx_rings, M_DEVBUF);
3162 }
3163
3164 /*********************************************************************
3165  *
3166  *  Free transmit ring related data structures.
3167  *
3168  **********************************************************************/
3169 static void
3170 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3171 {
3172         struct adapter *adapter = txr->adapter;
3173         struct ixgbe_tx_buf *tx_buffer;
3174         int             i;
3175
3176         INIT_DEBUGOUT("free_transmit_ring: begin");
3177
3178         if (txr->tx_buffers == NULL)
3179                 return;
3180
3181         tx_buffer = txr->tx_buffers;
3182         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3183                 if (tx_buffer->m_head != NULL) {
3184                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3185                             BUS_DMASYNC_POSTWRITE);
3186                         bus_dmamap_unload(txr->txtag,
3187                             tx_buffer->map);
3188                         m_freem(tx_buffer->m_head);
3189                         tx_buffer->m_head = NULL;
3190                         if (tx_buffer->map != NULL) {
3191                                 bus_dmamap_destroy(txr->txtag,
3192                                     tx_buffer->map);
3193                                 tx_buffer->map = NULL;
3194                         }
3195                 } else if (tx_buffer->map != NULL) {
3196                         bus_dmamap_unload(txr->txtag,
3197                             tx_buffer->map);
3198                         bus_dmamap_destroy(txr->txtag,
3199                             tx_buffer->map);
3200                         tx_buffer->map = NULL;
3201                 }
3202         }
3203 #if 0 /* __FreeBSD_version >= 800000 */
3204         if (txr->br != NULL)
3205                 buf_ring_free(txr->br, M_DEVBUF);
3206 #endif
3207         if (txr->tx_buffers != NULL) {
3208                 kfree(txr->tx_buffers, M_DEVBUF);
3209                 txr->tx_buffers = NULL;
3210         }
3211         if (txr->txtag != NULL) {
3212                 bus_dma_tag_destroy(txr->txtag);
3213                 txr->txtag = NULL;
3214         }
3215         return;
3216 }
3217
3218 /*********************************************************************
3219  *
3220  *  Advanced Context Descriptor setup for VLAN or CSUM
3221  *
3222  **********************************************************************/
3223
3224 static bool
3225 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3226 {
3227         struct adapter *adapter = txr->adapter;
3228         struct ixgbe_adv_tx_context_desc *TXD;
3229         struct ixgbe_tx_buf        *tx_buffer;
3230         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3231         struct ether_vlan_header *eh;
3232         struct ip *ip;
3233         struct ip6_hdr *ip6;
3234         int  ehdrlen, ip_hlen = 0;
3235         u16     etype;
3236         u8      ipproto = 0;
3237         bool    offload = TRUE;
3238         int ctxd = txr->next_avail_desc;
3239         u16 vtag = 0;
3240
3241
3242         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3243                 offload = FALSE;
3244
3245         tx_buffer = &txr->tx_buffers[ctxd];
3246         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3247
3248         /*
3249         ** In advanced descriptors the vlan tag must 
3250         ** be placed into the descriptor itself.
3251         */
3252         if (mp->m_flags & M_VLANTAG) {
3253                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3254                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3255         } else if (offload == FALSE)
3256                 return FALSE;
3257
3258         /*
3259          * Determine where frame payload starts.
3260          * Jump over vlan headers if already present,
3261          * helpful for QinQ too.
3262          */
3263         eh = mtod(mp, struct ether_vlan_header *);
3264         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3265                 etype = ntohs(eh->evl_proto);
3266                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3267         } else {
3268                 etype = ntohs(eh->evl_encap_proto);
3269                 ehdrlen = ETHER_HDR_LEN;
3270         }
3271
3272         /* Set the ether header length */
3273         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3274
3275         switch (etype) {
3276                 case ETHERTYPE_IP:
3277                         ip = (struct ip *)(mp->m_data + ehdrlen);
3278                         ip_hlen = ip->ip_hl << 2;
3279                         ipproto = ip->ip_p;
3280                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3281                         break;
3282                 case ETHERTYPE_IPV6:
3283                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3284                         ip_hlen = sizeof(struct ip6_hdr);
3285                         /* XXX-BZ this will go badly in case of ext hdrs. */
3286                         ipproto = ip6->ip6_nxt;
3287                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3288                         break;
3289                 default:
3290                         offload = FALSE;
3291                         break;
3292         }
3293
3294         vlan_macip_lens |= ip_hlen;
3295         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3296
3297         switch (ipproto) {
3298                 case IPPROTO_TCP:
3299                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3300                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3301                         break;
3302
3303                 case IPPROTO_UDP:
3304                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3305                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3306                         break;
3307
3308 #if 0
3309                 case IPPROTO_SCTP:
3310                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3311                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3312                         break;
3313 #endif
3314                 default:
3315                         offload = FALSE;
3316                         break;
3317         }
3318
3319         /* Now copy bits into descriptor */
3320         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3321         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3322         TXD->seqnum_seed = htole32(0);
3323         TXD->mss_l4len_idx = htole32(0);
3324
3325         tx_buffer->m_head = NULL;
3326         tx_buffer->eop_index = -1;
3327
3328         /* We've consumed the first desc, adjust counters */
3329         if (++ctxd == adapter->num_tx_desc)
3330                 ctxd = 0;
3331         txr->next_avail_desc = ctxd;
3332         --txr->tx_avail;
3333
3334         return (offload);
3335 }
3336
3337 /**********************************************************************
3338  *
3339  *  Setup work for hardware segmentation offload (TSO) on
3340  *  adapters using advanced tx descriptors
3341  *
3342  **********************************************************************/
3343 static bool
3344 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3345     u32 *olinfo_status)
3346 {
3347         struct adapter *adapter = txr->adapter;
3348         struct ixgbe_adv_tx_context_desc *TXD;
3349         struct ixgbe_tx_buf        *tx_buffer;
3350         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3351         u16 vtag = 0, eh_type;
3352         u32 mss_l4len_idx = 0, len;
3353         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3354         struct ether_vlan_header *eh;
3355 #if 0 /* IPv6 TSO */
3356 #ifdef INET6
3357         struct ip6_hdr *ip6;
3358 #endif
3359 #endif
3360 #ifdef INET
3361         struct ip *ip;
3362 #endif
3363         struct tcphdr *th;
3364
3365
3366         /*
3367          * Determine where frame payload starts.
3368          * Jump over vlan headers if already present
3369          */
3370         eh = mtod(mp, struct ether_vlan_header *);
3371         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3372                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3373                 eh_type = eh->evl_proto;
3374         } else {
3375                 ehdrlen = ETHER_HDR_LEN;
3376                 eh_type = eh->evl_encap_proto;
3377         }
3378
3379         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3380         len = ehdrlen + sizeof(struct tcphdr);
3381         switch (ntohs(eh_type)) {
3382 #if 0 /* IPv6 TSO */
3383 #ifdef INET6
3384         case ETHERTYPE_IPV6:
3385                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3386                         return FALSE;
3387                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3388                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3389                 if (ip6->ip6_nxt != IPPROTO_TCP)
3390                         return FALSE;
3391                 ip_hlen = sizeof(struct ip6_hdr);
3392                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3393                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3394                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3395                 break;
3396 #endif
3397 #endif
3398 #ifdef INET
3399         case ETHERTYPE_IP:
3400                 if (mp->m_len < len + sizeof(struct ip))
3401                         return FALSE;
3402                 ip = (struct ip *)(mp->m_data + ehdrlen);
3403                 if (ip->ip_p != IPPROTO_TCP)
3404                         return FALSE;
3405                 ip->ip_sum = 0;
3406                 ip_hlen = ip->ip_hl << 2;
3407                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3408                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3409                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3410                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3411                 /* Tell transmit desc to also do IPv4 checksum. */
3412                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3413                 break;
3414 #endif
3415         default:
3416                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3417                     __func__, ntohs(eh_type));
3418                 break;
3419         }
3420
3421         ctxd = txr->next_avail_desc;
3422         tx_buffer = &txr->tx_buffers[ctxd];
3423         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3424
3425         tcp_hlen = th->th_off << 2;
3426
3427         /* This is used in the transmit desc in encap */
3428         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3429
3430         /* VLAN MACLEN IPLEN */
3431         if (mp->m_flags & M_VLANTAG) {
3432                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3433                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3434         }
3435
3436         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3437         vlan_macip_lens |= ip_hlen;
3438         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3439
3440         /* ADV DTYPE TUCMD */
3441         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3442         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3443         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3444
3445         /* MSS L4LEN IDX */
3446         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3447         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3448         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3449
3450         TXD->seqnum_seed = htole32(0);
3451         tx_buffer->m_head = NULL;
3452         tx_buffer->eop_index = -1;
3453
3454         if (++ctxd == adapter->num_tx_desc)
3455                 ctxd = 0;
3456
3457         txr->tx_avail--;
3458         txr->next_avail_desc = ctxd;
3459         return TRUE;
3460 }
3461
3462 #ifdef IXGBE_FDIR
3463 /*
3464 ** This routine parses packet headers so that Flow
3465 ** Director can make a hashed filter table entry 
3466 ** allowing traffic flows to be identified and kept
3467 ** on the same cpu.  This would be a performance
3468 ** hit, but we only do it at IXGBE_FDIR_RATE of
3469 ** packets.
3470 */
3471 static void
3472 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3473 {
3474         struct adapter                  *adapter = txr->adapter;
3475         struct ix_queue                 *que;
3476         struct ip                       *ip;
3477         struct tcphdr                   *th;
3478         struct udphdr                   *uh;
3479         struct ether_vlan_header        *eh;
3480         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3481         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3482         int                             ehdrlen, ip_hlen;
3483         u16                             etype;
3484
3485         eh = mtod(mp, struct ether_vlan_header *);
3486         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3487                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3488                 etype = eh->evl_proto;
3489         } else {
3490                 ehdrlen = ETHER_HDR_LEN;
3491                 etype = eh->evl_encap_proto;
3492         }
3493
3494         /* Only handling IPv4 */
3495         if (etype != htons(ETHERTYPE_IP))
3496                 return;
3497
3498         ip = (struct ip *)(mp->m_data + ehdrlen);
3499         ip_hlen = ip->ip_hl << 2;
3500
3501         /* check if we're UDP or TCP */
3502         switch (ip->ip_p) {
3503         case IPPROTO_TCP:
3504                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3505                 /* src and dst are inverted */
3506                 common.port.dst ^= th->th_sport;
3507                 common.port.src ^= th->th_dport;
3508                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3509                 break;
3510         case IPPROTO_UDP:
3511                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3512                 /* src and dst are inverted */
3513                 common.port.dst ^= uh->uh_sport;
3514                 common.port.src ^= uh->uh_dport;
3515                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3516                 break;
3517         default:
3518                 return;
3519         }
3520
3521         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3522         if (mp->m_pkthdr.ether_vtag)
3523                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3524         else
3525                 common.flex_bytes ^= etype;
3526         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3527
3528         que = &adapter->queues[txr->me];
3529         /*
3530         ** This assumes the Rx queue and Tx
3531         ** queue are bound to the same CPU
3532         */
3533         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3534             input, common, que->msix);
3535 }
3536 #endif /* IXGBE_FDIR */
3537
3538 /**********************************************************************
3539  *
3540  *  Examine each tx_buffer in the used queue. If the hardware is done
3541  *  processing the packet then free associated resources. The
3542  *  tx_buffer is put back on the free queue.
3543  *
3544  **********************************************************************/
3545 static bool
3546 ixgbe_txeof(struct tx_ring *txr)
3547 {
3548         struct adapter  *adapter = txr->adapter;
3549         struct ifnet    *ifp = adapter->ifp;
3550         u32     first, last, done, processed;
3551         struct ixgbe_tx_buf *tx_buffer;
3552         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3553
3554         KKASSERT(lockstatus(&txr->tx_lock, curthread) != 0);
3555
3556 #ifdef DEV_NETMAP
3557         if (ifp->if_capenable & IFCAP_NETMAP) {
3558                 struct netmap_adapter *na = NA(ifp);
3559                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3560
3561                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3562
3563                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3564                     BUS_DMASYNC_POSTREAD);
3565                 /*
3566                  * In netmap mode, all the work is done in the context
3567                  * of the client thread. Interrupt handlers only wake up
3568                  * clients, which may be sleeping on individual rings
3569                  * or on a global resource for all rings.
3570                  * To implement tx interrupt mitigation, we wake up the client
3571                  * thread roughly every half ring, even if the NIC interrupts
3572                  * more frequently. This is implemented as follows:
3573                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3574                  *   the slot that should wake up the thread (nkr_num_slots
3575                  *   means the user thread should not be woken up);
3576                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3577                  *   or the slot has the DD bit set.
3578                  *
3579                  * When the driver has separate locks, we need to
3580                  * release and re-acquire txlock to avoid deadlocks.
3581                  * XXX see if we can find a better way.
3582                  */
3583                 if (!netmap_mitigate ||
3584                     (kring->nr_kflags < kring->nkr_num_slots &&
3585                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3586                         kring->nr_kflags = kring->nkr_num_slots;
3587                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3588                         IXGBE_TX_UNLOCK(txr);
3589                         IXGBE_CORE_LOCK(adapter);
3590                         selwakeuppri(&na->tx_si, PI_NET);
3591                         IXGBE_CORE_UNLOCK(adapter);
3592                         IXGBE_TX_LOCK(txr);
3593                 }
3594                 return FALSE;
3595         }
3596 #endif /* DEV_NETMAP */
3597
3598         if (txr->tx_avail == adapter->num_tx_desc) {
3599                 txr->queue_status = IXGBE_QUEUE_IDLE;
3600                 return FALSE;
3601         }
3602
3603         processed = 0;
3604         first = txr->next_to_clean;
3605         tx_buffer = &txr->tx_buffers[first];
3606         /* For cleanup we just use legacy struct */
3607         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3608         last = tx_buffer->eop_index;
3609         if (last == -1)
3610                 return FALSE;
3611         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3612
3613         /*
3614         ** Get the index of the first descriptor
3615         ** BEYOND the EOP and call that 'done'.
3616         ** I do this so the comparison in the
3617         ** inner while loop below can be simple
3618         */
3619         if (++last == adapter->num_tx_desc) last = 0;
3620         done = last;
3621
3622         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3623             BUS_DMASYNC_POSTREAD);
3624         /*
3625         ** Only the EOP descriptor of a packet now has the DD
3626         ** bit set, this is what we look for...
3627         */
3628         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3629                 /* We clean the range of the packet */
3630                 while (first != done) {
3631                         tx_desc->upper.data = 0;
3632                         tx_desc->lower.data = 0;
3633                         tx_desc->buffer_addr = 0;
3634                         ++txr->tx_avail;
3635                         ++processed;
3636
3637                         if (tx_buffer->m_head) {
3638                                 txr->bytes +=
3639                                     tx_buffer->m_head->m_pkthdr.len;
3640                                 bus_dmamap_sync(txr->txtag,
3641                                     tx_buffer->map,
3642                                     BUS_DMASYNC_POSTWRITE);
3643                                 bus_dmamap_unload(txr->txtag,
3644                                     tx_buffer->map);
3645                                 m_freem(tx_buffer->m_head);
3646                                 tx_buffer->m_head = NULL;
3647                                 tx_buffer->map = NULL;
3648                         }
3649                         tx_buffer->eop_index = -1;
3650                         txr->watchdog_time = ticks;
3651
3652                         if (++first == adapter->num_tx_desc)
3653                                 first = 0;
3654
3655                         tx_buffer = &txr->tx_buffers[first];
3656                         tx_desc =
3657                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3658                 }
3659                 ++txr->packets;
3660                 ++ifp->if_opackets;
3661                 /* See if there is more work now */
3662                 last = tx_buffer->eop_index;
3663                 if (last != -1) {
3664                         eop_desc =
3665                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3666                         /* Get next done point */
3667                         if (++last == adapter->num_tx_desc) last = 0;
3668                         done = last;
3669                 } else
3670                         break;
3671         }
3672         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3673             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3674
3675         txr->next_to_clean = first;
3676
3677         /*
3678         ** Watchdog calculation, we know there's
3679         ** work outstanding or the first return
3680         ** would have been taken, so none processed
3681         ** for too long indicates a hang.
3682         */
3683         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3684                 txr->queue_status = IXGBE_QUEUE_HUNG;
3685
3686         /* With a minimum free clear the depleted state bit.  */
3687         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3688                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3689
3690         if (txr->tx_avail == adapter->num_tx_desc) {
3691                 txr->queue_status = IXGBE_QUEUE_IDLE;
3692                 return (FALSE);
3693         }
3694
3695         return TRUE;
3696 }
3697
3698 /*********************************************************************
3699  *
3700  *  Refresh mbuf buffers for RX descriptor rings
3701  *   - now keeps its own state so discards due to resource
3702  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3703  *     it just returns, keeping its placeholder, thus it can simply
3704  *     be recalled to try again.
3705  *
3706  **********************************************************************/
3707 static void
3708 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3709 {
3710         struct adapter          *adapter = rxr->adapter;
3711         bus_dma_segment_t       hseg[1];
3712         bus_dma_segment_t       pseg[1];
3713         struct ixgbe_rx_buf     *rxbuf;
3714         struct mbuf             *mh, *mp;
3715         int                     i, j, nsegs, error;
3716         bool                    refreshed = FALSE;
3717
3718         i = j = rxr->next_to_refresh;
3719         /* Control the loop with one beyond */
3720         if (++j == adapter->num_rx_desc)
3721                 j = 0;
3722
3723         while (j != limit) {
3724                 rxbuf = &rxr->rx_buffers[i];
3725                 if (rxr->hdr_split == FALSE)
3726                         goto no_split;
3727
3728                 if (rxbuf->m_head == NULL) {
3729                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3730                         if (mh == NULL)
3731                                 goto update;
3732                 } else
3733                         mh = rxbuf->m_head;
3734
3735                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3736                 mh->m_len = MHLEN;
3737                 mh->m_flags |= M_PKTHDR;
3738                 /* Get the memory mapping */
3739                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3740                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3741                 if (error != 0) {
3742                         kprintf("Refresh mbufs: hdr dmamap load"
3743                             " failure - %d\n", error);
3744                         m_free(mh);
3745                         rxbuf->m_head = NULL;
3746                         goto update;
3747                 }
3748                 rxbuf->m_head = mh;
3749                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3750                     BUS_DMASYNC_PREREAD);
3751                 rxr->rx_base[i].read.hdr_addr =
3752                     htole64(hseg[0].ds_addr);
3753
3754 no_split:
3755                 if (rxbuf->m_pack == NULL) {
3756                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3757                             M_PKTHDR, adapter->rx_mbuf_sz);
3758                         if (mp == NULL)
3759                                 goto update;
3760                 } else
3761                         mp = rxbuf->m_pack;
3762
3763                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3764                 /* Get the memory mapping */
3765                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3766                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3767                 if (error != 0) {
3768                         kprintf("Refresh mbufs: payload dmamap load"
3769                             " failure - %d\n", error);
3770                         m_free(mp);
3771                         rxbuf->m_pack = NULL;
3772                         goto update;
3773                 }
3774                 rxbuf->m_pack = mp;
3775                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3776                     BUS_DMASYNC_PREREAD);
3777                 rxr->rx_base[i].read.pkt_addr =
3778                     htole64(pseg[0].ds_addr);
3779
3780                 refreshed = TRUE;
3781                 /* Next is precalculated */
3782                 i = j;
3783                 rxr->next_to_refresh = i;
3784                 if (++j == adapter->num_rx_desc)
3785                         j = 0;
3786         }
3787 update:
3788         if (refreshed) /* Update hardware tail index */
3789                 IXGBE_WRITE_REG(&adapter->hw,
3790                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3791         return;
3792 }
3793
3794 /*********************************************************************
3795  *
3796  *  Allocate memory for rx_buffer structures. Since we use one
3797  *  rx_buffer per received packet, the maximum number of rx_buffer's
3798  *  that we'll need is equal to the number of receive descriptors
3799  *  that we've allocated.
3800  *
3801  **********************************************************************/
3802 static int
3803 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3804 {
3805         struct  adapter         *adapter = rxr->adapter;
3806         device_t                dev = adapter->dev;
3807         struct ixgbe_rx_buf     *rxbuf;
3808         int                     i, bsize, error;
3809
3810         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3811         if (!(rxr->rx_buffers =
3812             (struct ixgbe_rx_buf *) kmalloc(bsize,
3813             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3814                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3815                 error = ENOMEM;
3816                 goto fail;
3817         }
3818
3819         if ((error = bus_dma_tag_create(NULL,   /* parent */
3820                                    1, 0,        /* alignment, bounds */
3821                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3822                                    BUS_SPACE_MAXADDR,   /* highaddr */
3823                                    NULL, NULL,          /* filter, filterarg */
3824                                    MSIZE,               /* maxsize */
3825                                    1,                   /* nsegments */
3826                                    MSIZE,               /* maxsegsize */
3827                                    0,                   /* flags */
3828                                    &rxr->htag))) {
3829                 device_printf(dev, "Unable to create RX DMA tag\n");
3830                 goto fail;
3831         }
3832
3833         if ((error = bus_dma_tag_create(NULL,   /* parent */
3834                                    1, 0,        /* alignment, bounds */
3835                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3836                                    BUS_SPACE_MAXADDR,   /* highaddr */
3837                                    NULL, NULL,          /* filter, filterarg */
3838                                    MJUM16BYTES,         /* maxsize */
3839                                    1,                   /* nsegments */
3840                                    MJUM16BYTES,         /* maxsegsize */
3841                                    0,                   /* flags */
3842                                    &rxr->ptag))) {
3843                 device_printf(dev, "Unable to create RX DMA tag\n");
3844                 goto fail;
3845         }
3846
3847         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3848                 rxbuf = &rxr->rx_buffers[i];
3849                 error = bus_dmamap_create(rxr->htag,
3850                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3851                 if (error) {
3852                         device_printf(dev, "Unable to create RX head map\n");
3853                         goto fail;
3854                 }
3855                 error = bus_dmamap_create(rxr->ptag,
3856                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3857                 if (error) {
3858                         device_printf(dev, "Unable to create RX pkt map\n");
3859                         goto fail;
3860                 }
3861         }
3862
3863         return (0);
3864
3865 fail:
3866         /* Frees all, but can handle partial completion */
3867         ixgbe_free_receive_structures(adapter);
3868         return (error);
3869 }
3870
3871 /*
3872 ** Used to detect a descriptor that has
3873 ** been merged by Hardware RSC.
3874 */
3875 static inline u32
3876 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3877 {
3878         return (le32toh(rx->wb.lower.lo_dword.data) &
3879             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3880 }
3881
3882 /*********************************************************************
3883  *
3884  *  Initialize Hardware RSC (LRO) feature on 82599
3885  *  for an RX ring, this is toggled by the LRO capability
3886  *  even though it is transparent to the stack.
3887  *
3888  **********************************************************************/
3889 #if 0   /* NET_LRO */
3890 static void
3891 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3892 {
3893         struct  adapter         *adapter = rxr->adapter;
3894         struct  ixgbe_hw        *hw = &adapter->hw;
3895         u32                     rscctrl, rdrxctl;
3896
3897         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3898         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3899 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3900         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3901 #endif /* DEV_NETMAP */
3902         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3903         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3904         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3905
3906         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3907         rscctrl |= IXGBE_RSCCTL_RSCEN;
3908         /*
3909         ** Limit the total number of descriptors that
3910         ** can be combined, so it does not exceed 64K
3911         */
3912         if (adapter->rx_mbuf_sz == MCLBYTES)
3913                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3914         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3915                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3916         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3917                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3918         else  /* Using 16K cluster */
3919                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3920
3921         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3922
3923         /* Enable TCP header recognition */
3924         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3925             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3926             IXGBE_PSRTYPE_TCPHDR));
3927
3928         /* Disable RSC for ACK packets */
3929         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3930             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3931
3932         rxr->hw_rsc = TRUE;
3933 }
3934 #endif
3935
3936 static void     
3937 ixgbe_free_receive_ring(struct rx_ring *rxr)
3938
3939         struct  adapter         *adapter;
3940         struct ixgbe_rx_buf       *rxbuf;
3941         int i;
3942
3943         adapter = rxr->adapter;
3944         for (i = 0; i < adapter->num_rx_desc; i++) {
3945                 rxbuf = &rxr->rx_buffers[i];
3946                 if (rxbuf->m_head != NULL) {
3947                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3948                             BUS_DMASYNC_POSTREAD);
3949                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3950                         rxbuf->m_head->m_flags |= M_PKTHDR;
3951                         m_freem(rxbuf->m_head);
3952                 }
3953                 if (rxbuf->m_pack != NULL) {
3954                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3955                             BUS_DMASYNC_POSTREAD);
3956                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3957                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3958                         m_freem(rxbuf->m_pack);
3959                 }
3960                 rxbuf->m_head = NULL;
3961                 rxbuf->m_pack = NULL;
3962         }
3963 }
3964
3965
3966 /*********************************************************************
3967  *
3968  *  Initialize a receive ring and its buffers.
3969  *
3970  **********************************************************************/
3971 static int
3972 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3973 {
3974         struct  adapter         *adapter;
3975         struct ifnet            *ifp;
3976         device_t                dev;
3977         struct ixgbe_rx_buf     *rxbuf;
3978         bus_dma_segment_t       pseg[1], hseg[1];
3979 #if 0   /* NET_LRO */
3980         struct lro_ctrl         *lro = &rxr->lro;
3981 #endif
3982         int                     rsize, nsegs, error = 0;
3983 #ifdef DEV_NETMAP
3984         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3985         struct netmap_slot *slot;
3986 #endif /* DEV_NETMAP */
3987
3988         adapter = rxr->adapter;
3989         ifp = adapter->ifp;
3990         dev = adapter->dev;
3991
3992         /* Clear the ring contents */
3993         IXGBE_RX_LOCK(rxr);
3994 #ifdef DEV_NETMAP
3995         /* same as in ixgbe_setup_transmit_ring() */
3996         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3997 #endif /* DEV_NETMAP */
3998         rsize = roundup2(adapter->num_rx_desc *
3999             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
4000         bzero((void *)rxr->rx_base, rsize);
4001
4002         /* Free current RX buffer structs and their mbufs */
4003         ixgbe_free_receive_ring(rxr);
4004
4005         /* Configure header split? */
4006         if (ixgbe_header_split)
4007         &n