ixgbe: Move sysctl creation to a separate function
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 static int      ixgbe_setup_msix(struct adapter *);
125 static void     ixgbe_free_pci_resources(struct adapter *);
126 static void     ixgbe_local_timer(void *);
127 static int      ixgbe_setup_interface(device_t, struct adapter *);
128 static void     ixgbe_config_link(struct adapter *);
129
130 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
131 static int      ixgbe_setup_transmit_structures(struct adapter *);
132 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
133 static void     ixgbe_initialize_transmit_units(struct adapter *);
134 static void     ixgbe_free_transmit_structures(struct adapter *);
135 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
136
137 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
138 static int      ixgbe_setup_receive_structures(struct adapter *);
139 static int      ixgbe_setup_receive_ring(struct rx_ring *);
140 static void     ixgbe_initialize_receive_units(struct adapter *);
141 static void     ixgbe_free_receive_structures(struct adapter *);
142 static void     ixgbe_free_receive_buffers(struct rx_ring *);
143 #if 0   /* NET_LRO */
144 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
145 #endif
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static bool     ixgbe_txeof(struct tx_ring *);
151 static bool     ixgbe_rxeof(struct ix_queue *, int);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
165                     const char *, int *, int);
166 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
167 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
168 static int      ixgbe_tso_pullup(struct tx_ring *, struct mbuf **);
169 static void     ixgbe_add_sysctl(struct adapter *);
170 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
171 static void     ixgbe_configure_ivars(struct adapter *);
172 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
173
174 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
175 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
176 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
177
178 static void     ixgbe_add_hw_stats(struct adapter *adapter);
179
180 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
181 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
182                     struct mbuf *, u32);
183
184 /* Support for pluggable optic modules */
185 static bool     ixgbe_sfp_probe(struct adapter *);
186 static void     ixgbe_setup_optics(struct adapter *);
187
188 /* Legacy (single vector interrupt handler */
189 static void     ixgbe_legacy_irq(void *);
190
191 /* The MSI/X Interrupt handlers */
192 static void     ixgbe_msix_que(void *);
193 static void     ixgbe_msix_link(void *);
194
195 /* Deferred interrupt tasklets */
196 static void     ixgbe_handle_que(void *, int);
197 static void     ixgbe_handle_link(void *, int);
198 static void     ixgbe_handle_msf(void *, int);
199 static void     ixgbe_handle_mod(void *, int);
200
201 #ifdef IXGBE_FDIR
202 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
203 static void     ixgbe_reinit_fdir(void *, int);
204 #endif
205
206 /*********************************************************************
207  *  FreeBSD Device Interface Entry Points
208  *********************************************************************/
209
210 static device_method_t ixgbe_methods[] = {
211         /* Device interface */
212         DEVMETHOD(device_probe, ixgbe_probe),
213         DEVMETHOD(device_attach, ixgbe_attach),
214         DEVMETHOD(device_detach, ixgbe_detach),
215         DEVMETHOD(device_shutdown, ixgbe_shutdown),
216         {0, 0}
217 };
218
219 static driver_t ixgbe_driver = {
220         "ix", ixgbe_methods, sizeof(struct adapter),
221 };
222
223 devclass_t ixgbe_devclass;
224 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
225
226 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
227 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
228
229 /*
230 ** TUNEABLE PARAMETERS:
231 */
232
233 /*
234 ** AIM: Adaptive Interrupt Moderation
235 ** which means that the interrupt rate
236 ** is varied over time based on the
237 ** traffic for that interrupt vector
238 */
239 static int ixgbe_enable_aim = TRUE;
240 TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
241
242 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
243 TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
244
245 /* How many packets rxeof tries to clean at a time */
246 static int ixgbe_rx_process_limit = 128;
247 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
248
249 /*
250 ** Smart speed setting, default to on
251 ** this only works as a compile option
252 ** right now as its during attach, set
253 ** this to 'ixgbe_smart_speed_off' to
254 ** disable.
255 */
256 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
257
258 static int ixgbe_msi_enable = 1;
259 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
260
261 /*
262  * MSIX should be the default for best performance,
263  * but this allows it to be forced off for testing.
264  */
265 static int ixgbe_enable_msix = 1;
266 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
267
268 /*
269  * Header split: this causes the hardware to DMA
270  * the header into a separate mbuf from the payload,
271  * it can be a performance win in some workloads, but
272  * in others it actually hurts, its off by default. 
273  */
274 static int ixgbe_header_split = FALSE;
275 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
276
277 /*
278  * Number of Queues, can be set to 0,
279  * it then autoconfigures based on the
280  * number of cpus with a max of 8. This
281  * can be overriden manually here.
282  */
283 static int ixgbe_num_queues = 0;
284 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
285
286 /*
287 ** Number of TX descriptors per ring,
288 ** setting higher than RX as this seems
289 ** the better performing choice.
290 */
291 static int ixgbe_txd = PERFORM_TXD;
292 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
293
294 /* Number of RX descriptors per ring */
295 static int ixgbe_rxd = PERFORM_RXD;
296 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
297
298 /* Keep running tab on them for sanity check */
299 static int ixgbe_total_ports;
300
301 #ifdef IXGBE_FDIR
302 /*
303 ** For Flow Director: this is the
304 ** number of TX packets we sample
305 ** for the filter pool, this means
306 ** every 20th packet will be probed.
307 **
308 ** This feature can be disabled by 
309 ** setting this to 0.
310 */
311 static int atr_sample_rate = 20;
312 /* 
313 ** Flow Director actually 'steals'
314 ** part of the packet buffer as its
315 ** filter pool, this variable controls
316 ** how much it uses:
317 **  0 = 64K, 1 = 128K, 2 = 256K
318 */
319 static int fdir_pballoc = 1;
320 #endif
321
322 #ifdef DEV_NETMAP
323 /*
324  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
325  * be a reference on how to implement netmap support in a driver.
326  * Additional comments are in ixgbe_netmap.h .
327  *
328  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
329  * that extend the standard driver.
330  */
331 #include <dev/netmap/ixgbe_netmap.h>
332 #endif /* DEV_NETMAP */
333
334 /*********************************************************************
335  *  Device identification routine
336  *
337  *  ixgbe_probe determines if the driver should be loaded on
338  *  adapter based on PCI vendor/device id of the adapter.
339  *
340  *  return BUS_PROBE_DEFAULT on success, positive on failure
341  *********************************************************************/
342
343 static int
344 ixgbe_probe(device_t dev)
345 {
346         ixgbe_vendor_info_t *ent;
347
348         u16     pci_vendor_id = 0;
349         u16     pci_device_id = 0;
350         u16     pci_subvendor_id = 0;
351         u16     pci_subdevice_id = 0;
352         char    adapter_name[256];
353
354         INIT_DEBUGOUT("ixgbe_probe: begin");
355
356         pci_vendor_id = pci_get_vendor(dev);
357         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
358                 return (ENXIO);
359
360         pci_device_id = pci_get_device(dev);
361         pci_subvendor_id = pci_get_subvendor(dev);
362         pci_subdevice_id = pci_get_subdevice(dev);
363
364         ent = ixgbe_vendor_info_array;
365         while (ent->vendor_id != 0) {
366                 if ((pci_vendor_id == ent->vendor_id) &&
367                     (pci_device_id == ent->device_id) &&
368
369                     ((pci_subvendor_id == ent->subvendor_id) ||
370                      (ent->subvendor_id == 0)) &&
371
372                     ((pci_subdevice_id == ent->subdevice_id) ||
373                      (ent->subdevice_id == 0))) {
374                         ksprintf(adapter_name, "%s, Version - %s",
375                                 ixgbe_strings[ent->index],
376                                 ixgbe_driver_version);
377                         device_set_desc_copy(dev, adapter_name);
378                         ++ixgbe_total_ports;
379                         return (BUS_PROBE_DEFAULT);
380                 }
381                 ent++;
382         }
383         return (ENXIO);
384 }
385
386 /*********************************************************************
387  *  Device initialization routine
388  *
389  *  The attach entry point is called when the driver is being loaded.
390  *  This routine identifies the type of hardware, allocates all resources
391  *  and initializes the hardware.
392  *
393  *  return 0 on success, positive on failure
394  *********************************************************************/
395
396 static int
397 ixgbe_attach(device_t dev)
398 {
399         struct adapter *adapter;
400         struct ixgbe_hw *hw;
401         int             error = 0;
402         u16             csum;
403         u32             ctrl_ext;
404
405         INIT_DEBUGOUT("ixgbe_attach: begin");
406
407         if (resource_disabled("ixgbe", device_get_unit(dev))) {
408                 device_printf(dev, "Disabled by device hint\n");
409                 return (ENXIO);
410         }
411
412         /* Allocate, clear, and link in our adapter structure */
413         adapter = device_get_softc(dev);
414         adapter->dev = adapter->osdep.dev = dev;
415         hw = &adapter->hw;
416
417         /* Core Lock Init*/
418         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
419
420         /* Set up the timer callout */
421         callout_init_mp(&adapter->timer);
422
423         /* Determine hardware revision */
424         ixgbe_identify_hardware(adapter);
425
426         /* Enable bus mastering */
427         pci_enable_busmaster(dev);
428
429         /* Do base PCI setup - map BAR0 */
430         if (ixgbe_allocate_pci_resources(adapter)) {
431                 device_printf(dev, "Allocation of PCI resources failed\n");
432                 error = ENXIO;
433                 goto err_out;
434         }
435
436         /* Do descriptor calc and sanity checks */
437         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
438             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
439                 device_printf(dev, "TXD config issue, using default!\n");
440                 adapter->num_tx_desc = DEFAULT_TXD;
441         } else
442                 adapter->num_tx_desc = ixgbe_txd;
443
444         /*
445         ** With many RX rings it is easy to exceed the
446         ** system mbuf allocation. Tuning nmbclusters
447         ** can alleviate this.
448         */
449         if (nmbclusters > 0 ) {
450                 int s;
451                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
452                 if (s > nmbclusters) {
453                         device_printf(dev, "RX Descriptors exceed "
454                             "system mbuf max, using default instead!\n");
455                         ixgbe_rxd = DEFAULT_RXD;
456                 }
457         }
458
459         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
460             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
461                 device_printf(dev, "RXD config issue, using default!\n");
462                 adapter->num_rx_desc = DEFAULT_RXD;
463         } else
464                 adapter->num_rx_desc = ixgbe_rxd;
465
466         /* Allocate our TX/RX Queues */
467         if (ixgbe_allocate_queues(adapter)) {
468                 error = ENOMEM;
469                 goto err_out;
470         }
471
472         /* Allocate multicast array memory. */
473         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
474             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
475         if (adapter->mta == NULL) {
476                 device_printf(dev, "Can not allocate multicast setup array\n");
477                 error = ENOMEM;
478                 goto err_late;
479         }
480
481         /* Initialize the shared code */
482         error = ixgbe_init_shared_code(hw);
483         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
484                 /*
485                 ** No optics in this port, set up
486                 ** so the timer routine will probe 
487                 ** for later insertion.
488                 */
489                 adapter->sfp_probe = TRUE;
490                 error = 0;
491         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
492                 device_printf(dev,"Unsupported SFP+ module detected!\n");
493                 error = EIO;
494                 goto err_late;
495         } else if (error) {
496                 device_printf(dev,"Unable to initialize the shared code\n");
497                 error = EIO;
498                 goto err_late;
499         }
500
501         /* Make sure we have a good EEPROM before we read from it */
502         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
503                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
504                 error = EIO;
505                 goto err_late;
506         }
507
508         error = ixgbe_init_hw(hw);
509         switch (error) {
510         case IXGBE_ERR_EEPROM_VERSION:
511                 device_printf(dev, "This device is a pre-production adapter/"
512                     "LOM.  Please be aware there may be issues associated "
513                     "with your hardware.\n If you are experiencing problems "
514                     "please contact your Intel or hardware representative "
515                     "who provided you with this hardware.\n");
516                 break;
517         case IXGBE_ERR_SFP_NOT_SUPPORTED:
518                 device_printf(dev,"Unsupported SFP+ Module\n");
519                 error = EIO;
520                 device_printf(dev,"Hardware Initialization Failure\n");
521                 goto err_late;
522         case IXGBE_ERR_SFP_NOT_PRESENT:
523                 device_printf(dev,"No SFP+ Module found\n");
524                 /* falls thru */
525         default:
526                 break;
527         }
528
529         /* Detect and set physical type */
530         ixgbe_setup_optics(adapter);
531
532         if ((adapter->msix > 1) && (ixgbe_enable_msix))
533                 error = ixgbe_allocate_msix(adapter); 
534         else
535                 error = ixgbe_allocate_legacy(adapter); 
536         if (error) 
537                 goto err_late;
538
539         /* Setup OS specific network interface */
540         if (ixgbe_setup_interface(dev, adapter) != 0)
541                 goto err_late;
542
543         /* Add sysctl tree */
544         ixgbe_add_sysctl(adapter);
545
546         /* Initialize statistics */
547         ixgbe_update_stats_counters(adapter);
548
549         /* Register for VLAN events */
550         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
551             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
552         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
553             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
554
555         /* Print PCIE bus type/speed/width info */
556         ixgbe_get_bus_info(hw);
557         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
558             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
559             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
560             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
561             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
562             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
563             ("Unknown"));
564
565         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
566             (hw->bus.speed == ixgbe_bus_speed_2500)) {
567                 device_printf(dev, "PCI-Express bandwidth available"
568                     " for this card\n     is not sufficient for"
569                     " optimal performance.\n");
570                 device_printf(dev, "For optimal performance a x8 "
571                     "PCIE, or x4 PCIE 2 slot is required.\n");
572         }
573
574         /* let hardware know driver is loaded */
575         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
576         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
577         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
578
579         ixgbe_add_hw_stats(adapter);
580
581 #ifdef DEV_NETMAP
582         ixgbe_netmap_attach(adapter);
583 #endif /* DEV_NETMAP */
584         INIT_DEBUGOUT("ixgbe_attach: end");
585         return (0);
586 err_late:
587         ixgbe_free_transmit_structures(adapter);
588         ixgbe_free_receive_structures(adapter);
589 err_out:
590         if (adapter->ifp != NULL)
591                 if_free(adapter->ifp);
592         ixgbe_free_pci_resources(adapter);
593         kfree(adapter->mta, M_DEVBUF);
594         return (error);
595
596 }
597
598 /*********************************************************************
599  *  Device removal routine
600  *
601  *  The detach entry point is called when the driver is being removed.
602  *  This routine stops the adapter and deallocates all the resources
603  *  that were allocated for driver operation.
604  *
605  *  return 0 on success, positive on failure
606  *********************************************************************/
607
608 static int
609 ixgbe_detach(device_t dev)
610 {
611         struct adapter *adapter = device_get_softc(dev);
612         struct ix_queue *que = adapter->queues;
613         u32     ctrl_ext;
614
615         INIT_DEBUGOUT("ixgbe_detach: begin");
616
617         /* Make sure VLANS are not using driver */
618         if (adapter->ifp->if_vlantrunks != NULL) {
619                 device_printf(dev,"Vlan in use, detach first\n");
620                 return (EBUSY);
621         }
622
623         IXGBE_CORE_LOCK(adapter);
624         ixgbe_stop(adapter);
625         IXGBE_CORE_UNLOCK(adapter);
626
627         for (int i = 0; i < adapter->num_queues; i++, que++) {
628                 if (que->tq) {
629                         taskqueue_drain(que->tq, &que->que_task);
630                         taskqueue_free(que->tq);
631                 }
632         }
633
634         /* Drain the Link queue */
635         if (adapter->tq) {
636                 taskqueue_drain(adapter->tq, &adapter->link_task);
637                 taskqueue_drain(adapter->tq, &adapter->mod_task);
638                 taskqueue_drain(adapter->tq, &adapter->msf_task);
639 #ifdef IXGBE_FDIR
640                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
641 #endif
642                 taskqueue_free(adapter->tq);
643         }
644
645         /* let hardware know driver is unloading */
646         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
647         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
648         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
649
650         /* Unregister VLAN events */
651         if (adapter->vlan_attach != NULL)
652                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
653         if (adapter->vlan_detach != NULL)
654                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
655
656         ether_ifdetach(adapter->ifp);
657         callout_stop(&adapter->timer);
658 #ifdef DEV_NETMAP
659         netmap_detach(adapter->ifp);
660 #endif /* DEV_NETMAP */
661         ixgbe_free_pci_resources(adapter);
662         bus_generic_detach(dev);
663         if_free(adapter->ifp);
664
665         ixgbe_free_transmit_structures(adapter);
666         ixgbe_free_receive_structures(adapter);
667         kfree(adapter->mta, M_DEVBUF);
668         sysctl_ctx_free(&adapter->sysctl_ctx);
669         
670         IXGBE_CORE_LOCK_DESTROY(adapter);
671         return (0);
672 }
673
674 /*********************************************************************
675  *
676  *  Shutdown entry point
677  *
678  **********************************************************************/
679
680 static int
681 ixgbe_shutdown(device_t dev)
682 {
683         struct adapter *adapter = device_get_softc(dev);
684         IXGBE_CORE_LOCK(adapter);
685         ixgbe_stop(adapter);
686         IXGBE_CORE_UNLOCK(adapter);
687         return (0);
688 }
689
690
691 /*********************************************************************
692  *  Transmit entry point
693  *
694  *  ixgbe_start is called by the stack to initiate a transmit.
695  *  The driver will remain in this routine as long as there are
696  *  packets to transmit and transmit resources are available.
697  *  In case resources are not available stack is notified and
698  *  the packet is requeued.
699  **********************************************************************/
700
701 static void
702 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
703 {
704         struct mbuf    *m_head;
705         struct adapter *adapter = txr->adapter;
706
707         IXGBE_TX_LOCK_ASSERT(txr);
708
709         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
710                 return;
711         if (!adapter->link_active)
712                 return;
713
714         while (!ifq_is_empty(&ifp->if_snd)) {
715                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
716                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
717                         break;
718                 }
719
720                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
721                 if (m_head == NULL)
722                         break;
723
724                 if (ixgbe_xmit(txr, &m_head)) {
725 #if 0 /* XXX: prepend to an ALTQ queue ? */
726                         if (m_head != NULL)
727                                 IF_PREPEND(&ifp->if_snd, m_head);
728 #endif
729                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
730                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
731                         break;
732                 }
733                 /* Send a copy of the frame to the BPF listener */
734                 ETHER_BPF_MTAP(ifp, m_head);
735
736                 /* Set watchdog on */
737                 txr->watchdog_time = ticks;
738                 txr->queue_status = IXGBE_QUEUE_WORKING;
739
740         }
741         return;
742 }
743
744 /*
745  * Legacy TX start - called by the stack, this
746  * always uses the first tx ring, and should
747  * not be used with multiqueue tx enabled.
748  */
749 static void
750 ixgbe_start(struct ifnet *ifp)
751 {
752         struct adapter *adapter = ifp->if_softc;
753         struct tx_ring  *txr = adapter->tx_rings;
754
755         if (ifp->if_flags & IFF_RUNNING) {
756                 IXGBE_TX_LOCK(txr);
757                 ixgbe_start_locked(txr, ifp);
758                 IXGBE_TX_UNLOCK(txr);
759         }
760         return;
761 }
762
763 #if 0 /* __FreeBSD_version >= 800000 */
764 /*
765 ** Multiqueue Transmit driver
766 **
767 */
768 static int
769 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
770 {
771         struct adapter  *adapter = ifp->if_softc;
772         struct ix_queue *que;
773         struct tx_ring  *txr;
774         int             i = 0, err = 0;
775
776         /* Which queue to use */
777         if ((m->m_flags & M_FLOWID) != 0)
778                 i = m->m_pkthdr.flowid % adapter->num_queues;
779         else
780                 i = curcpu % adapter->num_queues;
781
782         txr = &adapter->tx_rings[i];
783         que = &adapter->queues[i];
784
785         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
786             IXGBE_TX_TRYLOCK(txr)) {
787                 err = ixgbe_mq_start_locked(ifp, txr, m);
788                 IXGBE_TX_UNLOCK(txr);
789         } else {
790                 err = drbr_enqueue(ifp, txr->br, m);
791                 taskqueue_enqueue(que->tq, &que->que_task);
792         }
793
794         return (err);
795 }
796
797 static int
798 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
799 {
800         struct adapter  *adapter = txr->adapter;
801         struct mbuf     *next;
802         int             enqueued, err = 0;
803
804         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
805             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
806             adapter->link_active == 0) {
807                 if (m != NULL)
808                         err = drbr_enqueue(ifp, txr->br, m);
809                 return (err);
810         }
811
812         enqueued = 0;
813         if (m == NULL) {
814                 next = drbr_dequeue(ifp, txr->br);
815         } else if (drbr_needs_enqueue(ifp, txr->br)) {
816                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
817                         return (err);
818                 next = drbr_dequeue(ifp, txr->br);
819         } else
820                 next = m;
821
822         /* Process the queue */
823         while (next != NULL) {
824                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
825                         if (next != NULL)
826                                 err = drbr_enqueue(ifp, txr->br, next);
827                         break;
828                 }
829                 enqueued++;
830                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
831                 /* Send a copy of the frame to the BPF listener */
832                 ETHER_BPF_MTAP(ifp, next);
833                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
834                         break;
835                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
836                         ixgbe_txeof(txr);
837                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
838                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
839                         break;
840                 }
841                 next = drbr_dequeue(ifp, txr->br);
842         }
843
844         if (enqueued > 0) {
845                 /* Set watchdog on */
846                 txr->queue_status |= IXGBE_QUEUE_WORKING;
847                 txr->watchdog_time = ticks;
848         }
849
850         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
851                 ixgbe_txeof(txr);
852
853         return (err);
854 }
855
856 /*
857 ** Flush all ring buffers
858 */
859 static void
860 ixgbe_qflush(struct ifnet *ifp)
861 {
862         struct adapter  *adapter = ifp->if_softc;
863         struct tx_ring  *txr = adapter->tx_rings;
864         struct mbuf     *m;
865
866         for (int i = 0; i < adapter->num_queues; i++, txr++) {
867                 IXGBE_TX_LOCK(txr);
868                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
869                         m_freem(m);
870                 IXGBE_TX_UNLOCK(txr);
871         }
872         if_qflush(ifp);
873 }
874 #endif /* __FreeBSD_version >= 800000 */
875
876 /*********************************************************************
877  *  Ioctl entry point
878  *
879  *  ixgbe_ioctl is called when the user wants to configure the
880  *  interface.
881  *
882  *  return 0 on success, positive on failure
883  **********************************************************************/
884
885 static int
886 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
887 {
888         struct adapter  *adapter = ifp->if_softc;
889         struct ifreq    *ifr = (struct ifreq *) data;
890 #if defined(INET) || defined(INET6)
891         struct ifaddr *ifa = (struct ifaddr *)data;
892         bool            avoid_reset = FALSE;
893 #endif
894         int             error = 0;
895
896         switch (command) {
897
898         case SIOCSIFADDR:
899 #ifdef INET
900                 if (ifa->ifa_addr->sa_family == AF_INET)
901                         avoid_reset = TRUE;
902 #endif
903 #ifdef INET6
904                 if (ifa->ifa_addr->sa_family == AF_INET6)
905                         avoid_reset = TRUE;
906 #endif
907 #if defined(INET) || defined(INET6)
908                 /*
909                 ** Calling init results in link renegotiation,
910                 ** so we avoid doing it when possible.
911                 */
912                 if (avoid_reset) {
913                         ifp->if_flags |= IFF_UP;
914                         if (!(ifp->if_flags & IFF_RUNNING))
915                                 ixgbe_init(adapter);
916                         if (!(ifp->if_flags & IFF_NOARP))
917                                 arp_ifinit(ifp, ifa);
918                 } else
919                         error = ether_ioctl(ifp, command, data);
920 #endif
921                 break;
922         case SIOCSIFMTU:
923                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
924                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
925                         error = EINVAL;
926                 } else {
927                         IXGBE_CORE_LOCK(adapter);
928                         ifp->if_mtu = ifr->ifr_mtu;
929                         adapter->max_frame_size =
930                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
931                         ixgbe_init_locked(adapter);
932                         IXGBE_CORE_UNLOCK(adapter);
933                 }
934                 break;
935         case SIOCSIFFLAGS:
936                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
937                 IXGBE_CORE_LOCK(adapter);
938                 if (ifp->if_flags & IFF_UP) {
939                         if ((ifp->if_flags & IFF_RUNNING)) {
940                                 if ((ifp->if_flags ^ adapter->if_flags) &
941                                     (IFF_PROMISC | IFF_ALLMULTI)) {
942                                         ixgbe_set_promisc(adapter);
943                                 }
944                         } else
945                                 ixgbe_init_locked(adapter);
946                 } else
947                         if (ifp->if_flags & IFF_RUNNING)
948                                 ixgbe_stop(adapter);
949                 adapter->if_flags = ifp->if_flags;
950                 IXGBE_CORE_UNLOCK(adapter);
951                 break;
952         case SIOCADDMULTI:
953         case SIOCDELMULTI:
954                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
955                 if (ifp->if_flags & IFF_RUNNING) {
956                         IXGBE_CORE_LOCK(adapter);
957                         ixgbe_disable_intr(adapter);
958                         ixgbe_set_multi(adapter);
959                         ixgbe_enable_intr(adapter);
960                         IXGBE_CORE_UNLOCK(adapter);
961                 }
962                 break;
963         case SIOCSIFMEDIA:
964         case SIOCGIFMEDIA:
965                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
966                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
967                 break;
968         case SIOCSIFCAP:
969         {
970                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
971                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
972                 if (mask & IFCAP_HWCSUM)
973                         ifp->if_capenable ^= IFCAP_HWCSUM;
974                 if (mask & IFCAP_TSO4)
975                         ifp->if_capenable ^= IFCAP_TSO4;
976                 if (mask & IFCAP_TSO6)
977                         ifp->if_capenable ^= IFCAP_TSO6;
978 #if 0 /* NET_LRO */
979                 if (mask & IFCAP_LRO)
980                         ifp->if_capenable ^= IFCAP_LRO;
981 #endif
982                 if (mask & IFCAP_VLAN_HWTAGGING)
983                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
984                 if (mask & IFCAP_VLAN_HWFILTER)
985                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
986 #if 0 /* NET_TSO */
987                 if (mask & IFCAP_VLAN_HWTSO)
988                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
989 #endif
990                 if (ifp->if_flags & IFF_RUNNING) {
991                         IXGBE_CORE_LOCK(adapter);
992                         ixgbe_init_locked(adapter);
993                         IXGBE_CORE_UNLOCK(adapter);
994                 }
995 #if 0
996                 VLAN_CAPABILITIES(ifp);
997 #endif
998                 break;
999         }
1000
1001         default:
1002                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1003                 error = ether_ioctl(ifp, command, data);
1004                 break;
1005         }
1006
1007         return (error);
1008 }
1009
1010 /*********************************************************************
1011  *  Init entry point
1012  *
1013  *  This routine is used in two ways. It is used by the stack as
1014  *  init entry point in network interface structure. It is also used
1015  *  by the driver as a hw/sw initialization routine to get to a
1016  *  consistent state.
1017  *
1018  *  return 0 on success, positive on failure
1019  **********************************************************************/
1020 #define IXGBE_MHADD_MFS_SHIFT 16
1021
1022 static void
1023 ixgbe_init_locked(struct adapter *adapter)
1024 {
1025         struct ifnet   *ifp = adapter->ifp;
1026         device_t        dev = adapter->dev;
1027         struct ixgbe_hw *hw = &adapter->hw;
1028         u32             k, txdctl, mhadd, gpie;
1029         u32             rxdctl, rxctrl;
1030
1031         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1032         INIT_DEBUGOUT("ixgbe_init: begin");
1033         hw->adapter_stopped = FALSE;
1034         ixgbe_stop_adapter(hw);
1035         callout_stop(&adapter->timer);
1036
1037         /* reprogram the RAR[0] in case user changed it. */
1038         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1039
1040         /* Get the latest mac address, User can use a LAA */
1041         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1042               IXGBE_ETH_LENGTH_OF_ADDRESS);
1043         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1044         hw->addr_ctrl.rar_used_count = 1;
1045
1046         /* Set the various hardware offload abilities */
1047         ifp->if_hwassist = 0;
1048         if (ifp->if_capenable & IFCAP_TSO)
1049                 ifp->if_hwassist |= CSUM_TSO;
1050         if (ifp->if_capenable & IFCAP_TXCSUM) {
1051                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1052 #if 0
1053                 if (hw->mac.type != ixgbe_mac_82598EB)
1054                         ifp->if_hwassist |= CSUM_SCTP;
1055 #endif
1056         }
1057
1058         /* Prepare transmit descriptors and buffers */
1059         if (ixgbe_setup_transmit_structures(adapter)) {
1060                 device_printf(dev,"Could not setup transmit structures\n");
1061                 ixgbe_stop(adapter);
1062                 return;
1063         }
1064
1065         ixgbe_init_hw(hw);
1066         ixgbe_initialize_transmit_units(adapter);
1067
1068         /* Setup Multicast table */
1069         ixgbe_set_multi(adapter);
1070
1071         /*
1072         ** Determine the correct mbuf pool
1073         ** for doing jumbo/headersplit
1074         */
1075         if (adapter->max_frame_size <= 2048)
1076                 adapter->rx_mbuf_sz = MCLBYTES;
1077         else if (adapter->max_frame_size <= 4096)
1078                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1079         else if (adapter->max_frame_size <= 9216)
1080                 adapter->rx_mbuf_sz = MJUM9BYTES;
1081         else
1082                 adapter->rx_mbuf_sz = MJUM16BYTES;
1083
1084         /* Prepare receive descriptors and buffers */
1085         if (ixgbe_setup_receive_structures(adapter)) {
1086                 device_printf(dev,"Could not setup receive structures\n");
1087                 ixgbe_stop(adapter);
1088                 return;
1089         }
1090
1091         /* Configure RX settings */
1092         ixgbe_initialize_receive_units(adapter);
1093
1094         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1095
1096         /* Enable Fan Failure Interrupt */
1097         gpie |= IXGBE_SDP1_GPIEN;
1098
1099         /* Add for Module detection */
1100         if (hw->mac.type == ixgbe_mac_82599EB)
1101                 gpie |= IXGBE_SDP2_GPIEN;
1102
1103         /* Thermal Failure Detection */
1104         if (hw->mac.type == ixgbe_mac_X540)
1105                 gpie |= IXGBE_SDP0_GPIEN;
1106
1107         if (adapter->msix > 1) {
1108                 /* Enable Enhanced MSIX mode */
1109                 gpie |= IXGBE_GPIE_MSIX_MODE;
1110                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1111                     IXGBE_GPIE_OCD;
1112         }
1113         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1114
1115         /* Set MTU size */
1116         if (ifp->if_mtu > ETHERMTU) {
1117                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1118                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1119                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1120                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1121         }
1122         
1123         /* Now enable all the queues */
1124
1125         for (int i = 0; i < adapter->num_queues; i++) {
1126                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1127                 txdctl |= IXGBE_TXDCTL_ENABLE;
1128                 /* Set WTHRESH to 8, burst writeback */
1129                 txdctl |= (8 << 16);
1130                 /*
1131                  * When the internal queue falls below PTHRESH (32),
1132                  * start prefetching as long as there are at least
1133                  * HTHRESH (1) buffers ready. The values are taken
1134                  * from the Intel linux driver 3.8.21.
1135                  * Prefetching enables tx line rate even with 1 queue.
1136                  */
1137                 txdctl |= (32 << 0) | (1 << 8);
1138                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1139         }
1140
1141         for (int i = 0; i < adapter->num_queues; i++) {
1142                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1143                 if (hw->mac.type == ixgbe_mac_82598EB) {
1144                         /*
1145                         ** PTHRESH = 21
1146                         ** HTHRESH = 4
1147                         ** WTHRESH = 8
1148                         */
1149                         rxdctl &= ~0x3FFFFF;
1150                         rxdctl |= 0x080420;
1151                 }
1152                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1153                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1154                 for (k = 0; k < 10; k++) {
1155                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1156                             IXGBE_RXDCTL_ENABLE)
1157                                 break;
1158                         else
1159                                 msec_delay(1);
1160                 }
1161                 wmb();
1162 #ifdef DEV_NETMAP
1163                 /*
1164                  * In netmap mode, we must preserve the buffers made
1165                  * available to userspace before the if_init()
1166                  * (this is true by default on the TX side, because
1167                  * init makes all buffers available to userspace).
1168                  *
1169                  * netmap_reset() and the device specific routines
1170                  * (e.g. ixgbe_setup_receive_rings()) map these
1171                  * buffers at the end of the NIC ring, so here we
1172                  * must set the RDT (tail) register to make sure
1173                  * they are not overwritten.
1174                  *
1175                  * In this driver the NIC ring starts at RDH = 0,
1176                  * RDT points to the last slot available for reception (?),
1177                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1178                  */
1179                 if (ifp->if_capenable & IFCAP_NETMAP) {
1180                         struct netmap_adapter *na = NA(adapter->ifp);
1181                         struct netmap_kring *kring = &na->rx_rings[i];
1182                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1183
1184                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1185                 } else
1186 #endif /* DEV_NETMAP */
1187                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1188         }
1189
1190         /* Set up VLAN support and filter */
1191         ixgbe_setup_vlan_hw_support(adapter);
1192
1193         /* Enable Receive engine */
1194         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1195         if (hw->mac.type == ixgbe_mac_82598EB)
1196                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1197         rxctrl |= IXGBE_RXCTRL_RXEN;
1198         ixgbe_enable_rx_dma(hw, rxctrl);
1199
1200         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1201
1202         /* Set up MSI/X routing */
1203         if (ixgbe_enable_msix)  {
1204                 ixgbe_configure_ivars(adapter);
1205                 /* Set up auto-mask */
1206                 if (hw->mac.type == ixgbe_mac_82598EB)
1207                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1208                 else {
1209                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1210                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1211                 }
1212         } else {  /* Simple settings for Legacy/MSI */
1213                 ixgbe_set_ivar(adapter, 0, 0, 0);
1214                 ixgbe_set_ivar(adapter, 0, 0, 1);
1215                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1216         }
1217
1218 #ifdef IXGBE_FDIR
1219         /* Init Flow director */
1220         if (hw->mac.type != ixgbe_mac_82598EB) {
1221                 u32 hdrm = 32 << fdir_pballoc;
1222
1223                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1224                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1225         }
1226 #endif
1227
1228         /*
1229         ** Check on any SFP devices that
1230         ** need to be kick-started
1231         */
1232         if (hw->phy.type == ixgbe_phy_none) {
1233                 int err = hw->phy.ops.identify(hw);
1234                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1235                         device_printf(dev,
1236                             "Unsupported SFP+ module type was detected.\n");
1237                         return;
1238                 }
1239         }
1240
1241         /* Set moderation on the Link interrupt */
1242         IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1243
1244         /* Config/Enable Link */
1245         ixgbe_config_link(adapter);
1246
1247         /* Hardware Packet Buffer & Flow Control setup */
1248         {
1249                 u32 rxpb, frame, size, tmp;
1250
1251                 frame = adapter->max_frame_size;
1252
1253                 /* Calculate High Water */
1254                 if (hw->mac.type == ixgbe_mac_X540)
1255                         tmp = IXGBE_DV_X540(frame, frame);
1256                 else
1257                         tmp = IXGBE_DV(frame, frame);
1258                 size = IXGBE_BT2KB(tmp);
1259                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1260                 hw->fc.high_water[0] = rxpb - size;
1261
1262                 /* Now calculate Low Water */
1263                 if (hw->mac.type == ixgbe_mac_X540)
1264                         tmp = IXGBE_LOW_DV_X540(frame);
1265                 else
1266                         tmp = IXGBE_LOW_DV(frame);
1267                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1268                 
1269                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1270                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1271                 hw->fc.send_xon = TRUE;
1272         }
1273         /* Initialize the FC settings */
1274         ixgbe_start_hw(hw);
1275
1276         /* And now turn on interrupts */
1277         ixgbe_enable_intr(adapter);
1278
1279         /* Now inform the stack we're ready */
1280         ifp->if_flags |= IFF_RUNNING;
1281         ifp->if_flags &= ~IFF_OACTIVE;
1282
1283         return;
1284 }
1285
1286 static void
1287 ixgbe_init(void *arg)
1288 {
1289         struct adapter *adapter = arg;
1290
1291         IXGBE_CORE_LOCK(adapter);
1292         ixgbe_init_locked(adapter);
1293         IXGBE_CORE_UNLOCK(adapter);
1294         return;
1295 }
1296
1297
1298 /*
1299 **
1300 ** MSIX Interrupt Handlers and Tasklets
1301 **
1302 */
1303
1304 static inline void
1305 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1306 {
1307         struct ixgbe_hw *hw = &adapter->hw;
1308         u64     queue = (u64)(1 << vector);
1309         u32     mask;
1310
1311         if (hw->mac.type == ixgbe_mac_82598EB) {
1312                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1313                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1314         } else {
1315                 mask = (queue & 0xFFFFFFFF);
1316                 if (mask)
1317                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1318                 mask = (queue >> 32);
1319                 if (mask)
1320                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1321         }
1322 }
1323
1324 static inline void
1325 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1326 {
1327         struct ixgbe_hw *hw = &adapter->hw;
1328         u64     queue = (u64)(1 << vector);
1329         u32     mask;
1330
1331         if (hw->mac.type == ixgbe_mac_82598EB) {
1332                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1333                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1334         } else {
1335                 mask = (queue & 0xFFFFFFFF);
1336                 if (mask)
1337                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1338                 mask = (queue >> 32);
1339                 if (mask)
1340                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1341         }
1342 }
1343
1344 static inline void
1345 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1346 {
1347         u32 mask;
1348
1349         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1350                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1351                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1352         } else {
1353                 mask = (queues & 0xFFFFFFFF);
1354                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1355                 mask = (queues >> 32);
1356                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1357         }
1358 }
1359
1360
1361 static void
1362 ixgbe_handle_que(void *context, int pending)
1363 {
1364         struct ix_queue *que = context;
1365         struct adapter  *adapter = que->adapter;
1366         struct tx_ring  *txr = que->txr;
1367         struct ifnet    *ifp = adapter->ifp;
1368         bool            more;
1369
1370         if (ifp->if_flags & IFF_RUNNING) {
1371                 more = ixgbe_rxeof(que, adapter->rx_process_limit);
1372                 IXGBE_TX_LOCK(txr);
1373                 ixgbe_txeof(txr);
1374 #if 0 /*__FreeBSD_version >= 800000*/
1375                 if (!drbr_empty(ifp, txr->br))
1376                         ixgbe_mq_start_locked(ifp, txr, NULL);
1377 #else
1378                 if (!ifq_is_empty(&ifp->if_snd))
1379                         ixgbe_start_locked(txr, ifp);
1380 #endif
1381                 IXGBE_TX_UNLOCK(txr);
1382                 if (more) {
1383                         taskqueue_enqueue(que->tq, &que->que_task);
1384                         return;
1385                 }
1386         }
1387
1388         /* Reenable this interrupt */
1389         ixgbe_enable_queue(adapter, que->msix);
1390         return;
1391 }
1392
1393
1394 /*********************************************************************
1395  *
1396  *  Legacy Interrupt Service routine
1397  *
1398  **********************************************************************/
1399
1400 static void
1401 ixgbe_legacy_irq(void *arg)
1402 {
1403         struct ix_queue *que = arg;
1404         struct adapter  *adapter = que->adapter;
1405         struct ixgbe_hw *hw = &adapter->hw;
1406         struct          tx_ring *txr = adapter->tx_rings;
1407         bool            more_tx, more_rx;
1408         u32             reg_eicr, loop = MAX_LOOP;
1409
1410
1411         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1412
1413         ++que->irqs;
1414         if (reg_eicr == 0) {
1415                 ixgbe_enable_intr(adapter);
1416                 return;
1417         }
1418
1419         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1420
1421         IXGBE_TX_LOCK(txr);
1422         do {
1423                 more_tx = ixgbe_txeof(txr);
1424         } while (loop-- && more_tx);
1425         IXGBE_TX_UNLOCK(txr);
1426
1427         if (more_rx || more_tx)
1428                 taskqueue_enqueue(que->tq, &que->que_task);
1429
1430         /* Check for fan failure */
1431         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1432             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1433                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1434                     "REPLACE IMMEDIATELY!!\n");
1435                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1436         }
1437
1438         /* Link status change */
1439         if (reg_eicr & IXGBE_EICR_LSC)
1440                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1441
1442         ixgbe_enable_intr(adapter);
1443         return;
1444 }
1445
1446
1447 /*********************************************************************
1448  *
1449  *  MSIX Queue Interrupt Service routine
1450  *
1451  **********************************************************************/
1452 void
1453 ixgbe_msix_que(void *arg)
1454 {
1455         struct ix_queue *que = arg;
1456         struct adapter  *adapter = que->adapter;
1457         struct tx_ring  *txr = que->txr;
1458         struct rx_ring  *rxr = que->rxr;
1459         bool            more_tx, more_rx;
1460         u32             newitr = 0;
1461
1462         ixgbe_disable_queue(adapter, que->msix);
1463         ++que->irqs;
1464
1465         more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
1466
1467         IXGBE_TX_LOCK(txr);
1468         more_tx = ixgbe_txeof(txr);
1469         /*
1470         ** Make certain that if the stack 
1471         ** has anything queued the task gets
1472         ** scheduled to handle it.
1473         */
1474 #if 0
1475 #if __FreeBSD_version < 800000
1476         if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
1477 #else
1478         if (!drbr_empty(adapter->ifp, txr->br))
1479 #endif
1480 #endif
1481         if (!ifq_is_empty(&adapter->ifp->if_snd))
1482                 more_tx = 1;
1483         IXGBE_TX_UNLOCK(txr);
1484
1485         /* Do AIM now? */
1486
1487         if (ixgbe_enable_aim == FALSE)
1488                 goto no_calc;
1489         /*
1490         ** Do Adaptive Interrupt Moderation:
1491         **  - Write out last calculated setting
1492         **  - Calculate based on average size over
1493         **    the last interval.
1494         */
1495         if (que->eitr_setting)
1496                 IXGBE_WRITE_REG(&adapter->hw,
1497                     IXGBE_EITR(que->msix), que->eitr_setting);
1498  
1499         que->eitr_setting = 0;
1500
1501         /* Idle, do nothing */
1502         if ((txr->bytes == 0) && (rxr->bytes == 0))
1503                 goto no_calc;
1504                                 
1505         if ((txr->bytes) && (txr->packets))
1506                 newitr = txr->bytes/txr->packets;
1507         if ((rxr->bytes) && (rxr->packets))
1508                 newitr = max(newitr,
1509                     (rxr->bytes / rxr->packets));
1510         newitr += 24; /* account for hardware frame, crc */
1511
1512         /* set an upper boundary */
1513         newitr = min(newitr, 3000);
1514
1515         /* Be nice to the mid range */
1516         if ((newitr > 300) && (newitr < 1200))
1517                 newitr = (newitr / 3);
1518         else
1519                 newitr = (newitr / 2);
1520
1521         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1522                 newitr |= newitr << 16;
1523         else
1524                 newitr |= IXGBE_EITR_CNT_WDIS;
1525                  
1526         /* save for next interrupt */
1527         que->eitr_setting = newitr;
1528
1529         /* Reset state */
1530         txr->bytes = 0;
1531         txr->packets = 0;
1532         rxr->bytes = 0;
1533         rxr->packets = 0;
1534
1535 no_calc:
1536         if (more_tx || more_rx)
1537                 taskqueue_enqueue(que->tq, &que->que_task);
1538         else /* Reenable this interrupt */
1539                 ixgbe_enable_queue(adapter, que->msix);
1540         return;
1541 }
1542
1543
1544 static void
1545 ixgbe_msix_link(void *arg)
1546 {
1547         struct adapter  *adapter = arg;
1548         struct ixgbe_hw *hw = &adapter->hw;
1549         u32             reg_eicr;
1550
1551         ++adapter->link_irq;
1552
1553         /* First get the cause */
1554         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1555         /* Clear interrupt with write */
1556         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1557
1558         /* Link status change */
1559         if (reg_eicr & IXGBE_EICR_LSC)
1560                 taskqueue_enqueue(adapter->tq, &adapter->link_task);
1561
1562         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1563 #ifdef IXGBE_FDIR
1564                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1565                         /* This is probably overkill :) */
1566                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1567                                 return;
1568                         /* Disable the interrupt */
1569                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1570                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1571                 } else
1572 #endif
1573                 if (reg_eicr & IXGBE_EICR_ECC) {
1574                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1575                             "Please Reboot!!\n");
1576                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1577                 } else
1578
1579                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1580                         /* Clear the interrupt */
1581                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1582                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1583                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1584                         /* Clear the interrupt */
1585                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1586                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1587                 }
1588         } 
1589
1590         /* Check for fan failure */
1591         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1592             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1593                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1594                     "REPLACE IMMEDIATELY!!\n");
1595                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1596         }
1597
1598         /* Check for over temp condition */
1599         if ((hw->mac.type == ixgbe_mac_X540) &&
1600             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1601                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1602                     "PHY IS SHUT DOWN!!\n");
1603                 device_printf(adapter->dev, "System shutdown required\n");
1604                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1605         }
1606
1607         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1608         return;
1609 }
1610
1611 /*********************************************************************
1612  *
1613  *  Media Ioctl callback
1614  *
1615  *  This routine is called whenever the user queries the status of
1616  *  the interface using ifconfig.
1617  *
1618  **********************************************************************/
1619 static void
1620 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1621 {
1622         struct adapter *adapter = ifp->if_softc;
1623
1624         INIT_DEBUGOUT("ixgbe_media_status: begin");
1625         IXGBE_CORE_LOCK(adapter);
1626         ixgbe_update_link_status(adapter);
1627
1628         ifmr->ifm_status = IFM_AVALID;
1629         ifmr->ifm_active = IFM_ETHER;
1630
1631         if (!adapter->link_active) {
1632                 IXGBE_CORE_UNLOCK(adapter);
1633                 return;
1634         }
1635
1636         ifmr->ifm_status |= IFM_ACTIVE;
1637
1638         switch (adapter->link_speed) {
1639                 case IXGBE_LINK_SPEED_100_FULL:
1640                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1641                         break;
1642                 case IXGBE_LINK_SPEED_1GB_FULL:
1643                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1644                         break;
1645                 case IXGBE_LINK_SPEED_10GB_FULL:
1646                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1647                         break;
1648         }
1649
1650         IXGBE_CORE_UNLOCK(adapter);
1651
1652         return;
1653 }
1654
1655 /*********************************************************************
1656  *
1657  *  Media Ioctl callback
1658  *
1659  *  This routine is called when the user changes speed/duplex using
1660  *  media/mediopt option with ifconfig.
1661  *
1662  **********************************************************************/
1663 static int
1664 ixgbe_media_change(struct ifnet * ifp)
1665 {
1666         struct adapter *adapter = ifp->if_softc;
1667         struct ifmedia *ifm = &adapter->media;
1668
1669         INIT_DEBUGOUT("ixgbe_media_change: begin");
1670
1671         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1672                 return (EINVAL);
1673
1674         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1675         case IFM_AUTO:
1676                 adapter->hw.phy.autoneg_advertised =
1677                     IXGBE_LINK_SPEED_100_FULL |
1678                     IXGBE_LINK_SPEED_1GB_FULL |
1679                     IXGBE_LINK_SPEED_10GB_FULL;
1680                 break;
1681         default:
1682                 device_printf(adapter->dev, "Only auto media type\n");
1683                 return (EINVAL);
1684         }
1685
1686         return (0);
1687 }
1688
1689 /*********************************************************************
1690  *
1691  *  This routine maps the mbufs to tx descriptors, allowing the
1692  *  TX engine to transmit the packets. 
1693  *      - return 0 on success, positive on failure
1694  *
1695  **********************************************************************/
1696
1697 static int
1698 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1699 {
1700         struct adapter  *adapter = txr->adapter;
1701         u32             olinfo_status = 0, cmd_type_len;
1702         u32             paylen = 0;
1703         int             i, j, error, nsegs, maxsegs;
1704         int             first, last = 0;
1705         struct mbuf     *m_head;
1706         bus_dma_segment_t segs[adapter->num_segs];
1707         bus_dmamap_t    map;
1708         struct ixgbe_tx_buf *txbuf;
1709         union ixgbe_adv_tx_desc *txd = NULL;
1710
1711         m_head = *m_headp;
1712
1713         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1714                 error = ixgbe_tso_pullup(txr, m_headp);
1715                 if (error)
1716                         return error;
1717                 m_head = *m_headp;
1718         }
1719
1720         /* Basic descriptor defines */
1721         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1722             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1723
1724         if (m_head->m_flags & M_VLANTAG)
1725                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1726
1727         /*
1728          * Important to capture the first descriptor
1729          * used because it will contain the index of
1730          * the one we tell the hardware to report back
1731          */
1732         first = txr->next_avail_desc;
1733         txbuf = &txr->tx_buffers[first];
1734         map = txbuf->map;
1735
1736         /*
1737          * Map the packet for DMA.
1738          */
1739         maxsegs = txr->tx_avail - IXGBE_TX_RESERVED;
1740         if (maxsegs > adapter->num_segs)
1741                 maxsegs = adapter->num_segs;
1742
1743         error = bus_dmamap_load_mbuf_defrag(txr->txtag, map, m_headp,
1744             segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1745         if (error) {
1746                 if (error == ENOBUFS)
1747                         adapter->mbuf_defrag_failed++;
1748                 else
1749                         adapter->no_tx_dma_setup++;
1750
1751                 m_freem(*m_headp);
1752                 *m_headp = NULL;
1753                 return (error);
1754         }
1755
1756         /* Make certain there are enough descriptors */
1757         if (nsegs > txr->tx_avail - 2) {
1758                 txr->no_desc_avail++;
1759                 error = ENOBUFS;
1760                 goto xmit_fail;
1761         }
1762         m_head = *m_headp;
1763
1764         /*
1765         ** Set up the appropriate offload context
1766         ** this becomes the first descriptor of 
1767         ** a packet.
1768         */
1769         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1770                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1771                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1772                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1773                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1774                         ++adapter->tso_tx;
1775                 } else
1776                         return (ENXIO);
1777         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1778                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1779
1780 #ifdef IXGBE_IEEE1588
1781         /* This is changing soon to an mtag detection */
1782         if (we detect this mbuf has a TSTAMP mtag)
1783                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1784 #endif
1785
1786 #ifdef IXGBE_FDIR
1787         /* Do the flow director magic */
1788         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1789                 ++txr->atr_count;
1790                 if (txr->atr_count >= atr_sample_rate) {
1791                         ixgbe_atr(txr, m_head);
1792                         txr->atr_count = 0;
1793                 }
1794         }
1795 #endif
1796         /* Record payload length */
1797         if (paylen == 0)
1798                 olinfo_status |= m_head->m_pkthdr.len <<
1799                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1800
1801         i = txr->next_avail_desc;
1802         for (j = 0; j < nsegs; j++) {
1803                 bus_size_t seglen;
1804                 bus_addr_t segaddr;
1805
1806                 txbuf = &txr->tx_buffers[i];
1807                 txd = &txr->tx_base[i];
1808                 seglen = segs[j].ds_len;
1809                 segaddr = htole64(segs[j].ds_addr);
1810
1811                 txd->read.buffer_addr = segaddr;
1812                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1813                     cmd_type_len |seglen);
1814                 txd->read.olinfo_status = htole32(olinfo_status);
1815                 last = i; /* descriptor that will get completion IRQ */
1816
1817                 if (++i == adapter->num_tx_desc)
1818                         i = 0;
1819
1820                 txbuf->m_head = NULL;
1821                 txbuf->eop_index = -1;
1822         }
1823
1824         txd->read.cmd_type_len |=
1825             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1826         txr->tx_avail -= nsegs;
1827         txr->next_avail_desc = i;
1828
1829         txbuf->m_head = m_head;
1830         /* Swap the dma map between the first and last descriptor */
1831         txr->tx_buffers[first].map = txbuf->map;
1832         txbuf->map = map;
1833         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1834
1835         /* Set the index of the descriptor that will be marked done */
1836         txbuf = &txr->tx_buffers[first];
1837         txbuf->eop_index = last;
1838
1839         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1840             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1841         /*
1842          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1843          * hardware that this frame is available to transmit.
1844          */
1845         ++txr->total_packets;
1846         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1847
1848         return (0);
1849
1850 xmit_fail:
1851         bus_dmamap_unload(txr->txtag, txbuf->map);
1852         return (error);
1853
1854 }
1855
1856 static void
1857 ixgbe_set_promisc(struct adapter *adapter)
1858 {
1859         u_int32_t       reg_rctl;
1860         struct ifnet   *ifp = adapter->ifp;
1861
1862         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1863         reg_rctl &= (~IXGBE_FCTRL_UPE);
1864         reg_rctl &= (~IXGBE_FCTRL_MPE);
1865         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1866
1867         if (ifp->if_flags & IFF_PROMISC) {
1868                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1869                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1870         } else if (ifp->if_flags & IFF_ALLMULTI) {
1871                 reg_rctl |= IXGBE_FCTRL_MPE;
1872                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1873                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1874         }
1875         return;
1876 }
1877
1878
1879 /*********************************************************************
1880  *  Multicast Update
1881  *
1882  *  This routine is called whenever multicast address list is updated.
1883  *
1884  **********************************************************************/
1885 #define IXGBE_RAR_ENTRIES 16
1886
1887 static void
1888 ixgbe_set_multi(struct adapter *adapter)
1889 {
1890         u32     fctrl;
1891         u8      *mta;
1892         u8      *update_ptr;
1893         struct  ifmultiaddr *ifma;
1894         int     mcnt = 0;
1895         struct ifnet   *ifp = adapter->ifp;
1896
1897         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1898
1899         mta = adapter->mta;
1900         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1901             MAX_NUM_MULTICAST_ADDRESSES);
1902
1903         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1904         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1905         if (ifp->if_flags & IFF_PROMISC)
1906                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1907         else if (ifp->if_flags & IFF_ALLMULTI) {
1908                 fctrl |= IXGBE_FCTRL_MPE;
1909                 fctrl &= ~IXGBE_FCTRL_UPE;
1910         } else
1911                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1912         
1913         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1914
1915         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1916                 if (ifma->ifma_addr->sa_family != AF_LINK)
1917                         continue;
1918                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1919                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1920                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1921                 mcnt++;
1922         }
1923
1924         update_ptr = mta;
1925         ixgbe_update_mc_addr_list(&adapter->hw,
1926             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1927
1928         return;
1929 }
1930
1931 /*
1932  * This is an iterator function now needed by the multicast
1933  * shared code. It simply feeds the shared code routine the
1934  * addresses in the array of ixgbe_set_multi() one by one.
1935  */
1936 static u8 *
1937 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1938 {
1939         u8 *addr = *update_ptr;
1940         u8 *newptr;
1941         *vmdq = 0;
1942
1943         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1944         *update_ptr = newptr;
1945         return addr;
1946 }
1947
1948
1949 /*********************************************************************
1950  *  Timer routine
1951  *
1952  *  This routine checks for link status,updates statistics,
1953  *  and runs the watchdog check.
1954  *
1955  **********************************************************************/
1956
1957 static void
1958 ixgbe_local_timer(void *arg)
1959 {
1960         struct adapter  *adapter = arg;
1961         device_t        dev = adapter->dev;
1962         struct ifnet    *ifp = adapter->ifp;
1963         struct ix_queue *que = adapter->queues;
1964         struct tx_ring  *txr = adapter->tx_rings;
1965         int             hung, busy, paused;
1966
1967         IXGBE_CORE_LOCK(adapter);
1968         hung = busy = paused = 0;
1969
1970         /* Check for pluggable optics */
1971         if (adapter->sfp_probe)
1972                 if (!ixgbe_sfp_probe(adapter))
1973                         goto out; /* Nothing to do */
1974
1975         ixgbe_update_link_status(adapter);
1976         ixgbe_update_stats_counters(adapter);
1977
1978         /*
1979          * If the interface has been paused
1980          * then don't do the watchdog check
1981          */
1982         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
1983                 paused = 1;
1984
1985         /*
1986         ** Check the TX queues status
1987         **      - central locked handling of OACTIVE
1988         **      - watchdog only if all queues show hung
1989         */          
1990         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
1991                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
1992                     (paused == 0))
1993                         ++hung;
1994                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
1995                         ++busy;
1996                 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
1997                         taskqueue_enqueue(que->tq, &que->que_task);
1998         }
1999         /* Only truely watchdog if all queues show hung */
2000         if (hung == adapter->num_queues)
2001                 goto watchdog;
2002         /* Only turn off the stack flow when ALL are depleted */
2003         if (busy == adapter->num_queues)
2004                 ifp->if_flags |= IFF_OACTIVE;
2005         else if ((ifp->if_flags & IFF_OACTIVE) &&
2006             (busy < adapter->num_queues))
2007                 ifp->if_flags &= ~IFF_OACTIVE;
2008
2009 out:
2010         ixgbe_rearm_queues(adapter, adapter->que_mask);
2011         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2012         IXGBE_CORE_UNLOCK(adapter);
2013         return;
2014
2015 watchdog:
2016         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2017         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2018             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2019             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2020         device_printf(dev,"TX(%d) desc avail = %d,"
2021             "Next TX to Clean = %d\n",
2022             txr->me, txr->tx_avail, txr->next_to_clean);
2023         adapter->ifp->if_flags &= ~IFF_RUNNING;
2024         adapter->watchdog_events++;
2025         ixgbe_init_locked(adapter);
2026
2027         IXGBE_CORE_UNLOCK(adapter);
2028 }
2029
2030 /*
2031 ** Note: this routine updates the OS on the link state
2032 **      the real check of the hardware only happens with
2033 **      a link interrupt.
2034 */
2035 static void
2036 ixgbe_update_link_status(struct adapter *adapter)
2037 {
2038         struct ifnet    *ifp = adapter->ifp;
2039         struct tx_ring *txr = adapter->tx_rings;
2040         device_t dev = adapter->dev;
2041
2042
2043         if (adapter->link_up){ 
2044                 if (adapter->link_active == FALSE) {
2045                         if (bootverbose)
2046                                 device_printf(dev,"Link is up %d Gbps %s \n",
2047                                     ((adapter->link_speed == 128)? 10:1),
2048                                     "Full Duplex");
2049                         adapter->link_active = TRUE;
2050                         /* Update any Flow Control changes */
2051                         ixgbe_fc_enable(&adapter->hw);
2052                         ifp->if_link_state = LINK_STATE_UP;
2053                         if_link_state_change(ifp);
2054                 }
2055         } else { /* Link down */
2056                 if (adapter->link_active == TRUE) {
2057                         if (bootverbose)
2058                                 device_printf(dev,"Link is Down\n");
2059                         ifp->if_link_state = LINK_STATE_DOWN;
2060                         if_link_state_change(ifp);
2061                         adapter->link_active = FALSE;
2062                         for (int i = 0; i < adapter->num_queues;
2063                             i++, txr++)
2064                                 txr->queue_status = IXGBE_QUEUE_IDLE;
2065                 }
2066         }
2067
2068         return;
2069 }
2070
2071
2072 /*********************************************************************
2073  *
2074  *  This routine disables all traffic on the adapter by issuing a
2075  *  global reset on the MAC and deallocates TX/RX buffers.
2076  *
2077  **********************************************************************/
2078
2079 static void
2080 ixgbe_stop(void *arg)
2081 {
2082         struct ifnet   *ifp;
2083         struct adapter *adapter = arg;
2084         struct ixgbe_hw *hw = &adapter->hw;
2085         ifp = adapter->ifp;
2086
2087         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
2088
2089         INIT_DEBUGOUT("ixgbe_stop: begin\n");
2090         ixgbe_disable_intr(adapter);
2091         callout_stop(&adapter->timer);
2092
2093         /* Let the stack know...*/
2094         ifp->if_flags &= ~IFF_RUNNING;
2095         ifp->if_flags |= IFF_OACTIVE;
2096
2097         ixgbe_reset_hw(hw);
2098         hw->adapter_stopped = FALSE;
2099         ixgbe_stop_adapter(hw);
2100         /* Turn off the laser */
2101         if (hw->phy.multispeed_fiber)
2102                 ixgbe_disable_tx_laser(hw);
2103
2104         /* reprogram the RAR[0] in case user changed it. */
2105         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2106
2107         return;
2108 }
2109
2110
2111 /*********************************************************************
2112  *
2113  *  Determine hardware revision.
2114  *
2115  **********************************************************************/
2116 static void
2117 ixgbe_identify_hardware(struct adapter *adapter)
2118 {
2119         device_t        dev = adapter->dev;
2120         struct ixgbe_hw *hw = &adapter->hw;
2121
2122         /* Save off the information about this board */
2123         hw->vendor_id = pci_get_vendor(dev);
2124         hw->device_id = pci_get_device(dev);
2125         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2126         hw->subsystem_vendor_id =
2127             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2128         hw->subsystem_device_id =
2129             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2130
2131         /* We need this here to set the num_segs below */
2132         ixgbe_set_mac_type(hw);
2133
2134         /* Pick up the 82599 and VF settings */
2135         if (hw->mac.type != ixgbe_mac_82598EB) {
2136                 hw->phy.smart_speed = ixgbe_smart_speed;
2137                 adapter->num_segs = IXGBE_82599_SCATTER;
2138         } else
2139                 adapter->num_segs = IXGBE_82598_SCATTER;
2140
2141         return;
2142 }
2143
2144 /*********************************************************************
2145  *
2146  *  Determine optic type
2147  *
2148  **********************************************************************/
2149 static void
2150 ixgbe_setup_optics(struct adapter *adapter)
2151 {
2152         struct ixgbe_hw *hw = &adapter->hw;
2153         int             layer;
2154         
2155         layer = ixgbe_get_supported_physical_layer(hw);
2156
2157         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2158                 adapter->optics = IFM_10G_T;
2159                 return;
2160         }
2161
2162         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2163                 adapter->optics = IFM_1000_T;
2164                 return;
2165         }
2166
2167         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2168             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2169                 adapter->optics = IFM_10G_LR;
2170                 return;
2171         }
2172
2173         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2174                 adapter->optics = IFM_10G_SR;
2175                 return;
2176         }
2177
2178         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2179                 adapter->optics = IFM_10G_TWINAX;
2180                 return;
2181         }
2182
2183         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2184             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2185                 adapter->optics = IFM_10G_CX4;
2186                 return;
2187         }
2188
2189         /* If we get here just set the default */
2190         adapter->optics = IFM_ETHER | IFM_AUTO;
2191         return;
2192 }
2193
2194 /*********************************************************************
2195  *
2196  *  Setup the Legacy or MSI Interrupt handler
2197  *
2198  **********************************************************************/
2199 static int
2200 ixgbe_allocate_legacy(struct adapter *adapter)
2201 {
2202         device_t dev = adapter->dev;
2203         struct          ix_queue *que = adapter->queues;
2204         int error, rid = 0;
2205         unsigned int intr_flags;
2206
2207         /* MSI RID at 1 */
2208         if (adapter->msix == 1)
2209                 rid = 1;
2210
2211         /* Try allocating a MSI interrupt first */
2212         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2213                 &rid, &intr_flags);
2214
2215         /* We allocate a single interrupt resource */
2216         adapter->res = bus_alloc_resource_any(dev,
2217             SYS_RES_IRQ, &rid, intr_flags);
2218         if (adapter->res == NULL) {
2219                 device_printf(dev, "Unable to allocate bus resource: "
2220                     "interrupt\n");
2221                 return (ENXIO);
2222         }
2223
2224         /*
2225          * Try allocating a fast interrupt and the associated deferred
2226          * processing contexts.
2227          */
2228         TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2229         que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2230             taskqueue_thread_enqueue, &que->tq);
2231         taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s ixq",
2232             device_get_nameunit(adapter->dev));
2233
2234         /* Tasklets for Link, SFP and Multispeed Fiber */
2235         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2236         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2237         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2238 #ifdef IXGBE_FDIR
2239         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2240 #endif
2241         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2242             taskqueue_thread_enqueue, &adapter->tq);
2243         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2244             device_get_nameunit(adapter->dev));
2245
2246         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2247             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2248                 device_printf(dev, "Failed to register fast interrupt "
2249                     "handler: %d\n", error);
2250                 taskqueue_free(que->tq);
2251                 taskqueue_free(adapter->tq);
2252                 que->tq = NULL;
2253                 adapter->tq = NULL;
2254                 return (error);
2255         }
2256         /* For simplicity in the handlers */
2257         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2258
2259         return (0);
2260 }
2261
2262
2263 /*********************************************************************
2264  *
2265  *  Setup MSIX Interrupt resources and handlers 
2266  *
2267  **********************************************************************/
2268 static int
2269 ixgbe_allocate_msix(struct adapter *adapter)
2270 {
2271         device_t        dev = adapter->dev;
2272         struct          ix_queue *que = adapter->queues;
2273         int             error, rid, vector = 0;
2274         char            desc[16];
2275
2276         error = pci_setup_msix(dev);
2277         if (error) {
2278                 device_printf(dev, "MSI-X setup failed\n");
2279                 return (error);
2280         }
2281
2282         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2283                 rid = vector + 1;
2284
2285                 /*
2286                 ** Bind the msix vector, and thus the
2287                 ** ring to the corresponding cpu.
2288                 */
2289                 error = pci_alloc_msix_vector(dev, vector, &rid, i);
2290                 if (error) {
2291                         device_printf(dev, "pci_alloc_msix_vector failed\n");
2292                         return (error);
2293                 }
2294
2295                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2296                     RF_SHAREABLE | RF_ACTIVE);
2297                 if (que->res == NULL) {
2298                         device_printf(dev,"Unable to allocate"
2299                             " bus resource: que interrupt [%d]\n", vector);
2300                         return (ENXIO);
2301                 }
2302                 /* Set the handler function */
2303                 ksnprintf(desc, sizeof(desc), "%s que %d",
2304                     device_get_nameunit(dev), i);
2305                 error = bus_setup_intr_descr(dev, que->res, INTR_MPSAFE,
2306                     ixgbe_msix_que, que, &que->tag, &que->serializer, desc);
2307                 if (error) {
2308                         que->res = NULL;
2309                         device_printf(dev, "Failed to register QUE handler");
2310                         return (error);
2311                 }
2312                 que->msix = vector;
2313                 adapter->que_mask |= (u64)(1 << que->msix);
2314
2315                 TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2316                 que->tq = taskqueue_create("ixgbe_que", M_NOWAIT,
2317                     taskqueue_thread_enqueue, &que->tq);
2318                 taskqueue_start_threads(&que->tq, 1, PI_NET, -1, "%s que",
2319                     device_get_nameunit(adapter->dev));
2320         }
2321
2322         /* and Link, bind vector to cpu #0 */
2323         rid = vector + 1;
2324         error = pci_alloc_msix_vector(dev, vector, &rid, 0);
2325         if (error) {
2326                 device_printf(dev, "pci_alloc_msix_vector failed\n");
2327                 return (error);
2328         }
2329         adapter->res = bus_alloc_resource_any(dev,
2330             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2331         if (!adapter->res) {
2332                 device_printf(dev,"Unable to allocate"
2333             " bus resource: Link interrupt [%d]\n", rid);
2334                 return (ENXIO);
2335         }
2336         /* Set the link handler function */
2337         error = bus_setup_intr_descr(dev, adapter->res, INTR_MPSAFE,
2338             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer,
2339             "link");
2340         if (error) {
2341                 adapter->res = NULL;
2342                 device_printf(dev, "Failed to register LINK handler");
2343                 return (error);
2344         }
2345         pci_enable_msix(dev);
2346
2347         adapter->linkvec = vector;
2348         /* Tasklets for Link, SFP and Multispeed Fiber */
2349         TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2350         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2351         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2352 #ifdef IXGBE_FDIR
2353         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2354 #endif
2355         adapter->tq = taskqueue_create("ixgbe_link", M_NOWAIT,
2356             taskqueue_thread_enqueue, &adapter->tq);
2357         taskqueue_start_threads(&adapter->tq, 1, PI_NET, -1, "%s linkq",
2358             device_get_nameunit(adapter->dev));
2359
2360         return (0);
2361 }
2362
2363 /*
2364  * Setup Either MSI/X or MSI
2365  */
2366 static int
2367 ixgbe_setup_msix(struct adapter *adapter)
2368 {
2369         device_t dev = adapter->dev;
2370         int rid, want, queues, msgs;
2371
2372         /* Override by tuneable */
2373         if (ixgbe_enable_msix == 0)
2374                 goto msi;
2375
2376         /* First try MSI/X */
2377         rid = PCIR_BAR(MSIX_82598_BAR);
2378         adapter->msix_mem = bus_alloc_resource_any(dev,
2379             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2380         if (!adapter->msix_mem) {
2381                 rid += 4;       /* 82599 maps in higher BAR */
2382                 adapter->msix_mem = bus_alloc_resource_any(dev,
2383                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2384         }
2385         if (!adapter->msix_mem) {
2386                 /* May not be enabled */
2387                 device_printf(adapter->dev,
2388                     "Unable to map MSIX table \n");
2389                 goto msi;
2390         }
2391
2392         msgs = pci_msix_count(dev); 
2393         if (msgs == 0) { /* system has msix disabled */
2394                 bus_release_resource(dev, SYS_RES_MEMORY,
2395                     rid, adapter->msix_mem);
2396                 adapter->msix_mem = NULL;
2397                 goto msi;
2398         }
2399
2400         /* Figure out a reasonable auto config value */
2401         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2402
2403         if (ixgbe_num_queues != 0)
2404                 queues = ixgbe_num_queues;
2405         /* Set max queues to 8 when autoconfiguring */
2406         else if ((ixgbe_num_queues == 0) && (queues > 8))
2407                 queues = 8;
2408
2409         /*
2410         ** Want one vector (RX/TX pair) per queue
2411         ** plus an additional for Link.
2412         */
2413         want = queues + 1;
2414         if (msgs >= want)
2415                 msgs = want;
2416         else {
2417                 device_printf(adapter->dev,
2418                     "MSIX Configuration Problem, "
2419                     "%d vectors but %d queues wanted!\n",
2420                     msgs, want);
2421                 return (0); /* Will go to Legacy setup */
2422         }
2423         if (msgs) {
2424                 device_printf(adapter->dev,
2425                     "Using MSIX interrupts with %d vectors\n", msgs);
2426                 adapter->num_queues = queues;
2427                 return (msgs);
2428         }
2429 msi:
2430         msgs = pci_msi_count(dev);
2431         return (msgs);
2432 }
2433
2434
2435 static int
2436 ixgbe_allocate_pci_resources(struct adapter *adapter)
2437 {
2438         int             rid;
2439         device_t        dev = adapter->dev;
2440
2441         rid = PCIR_BAR(0);
2442         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2443             &rid, RF_ACTIVE);
2444
2445         if (!(adapter->pci_mem)) {
2446                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2447                 return (ENXIO);
2448         }
2449
2450         adapter->osdep.mem_bus_space_tag =
2451                 rman_get_bustag(adapter->pci_mem);
2452         adapter->osdep.mem_bus_space_handle =
2453                 rman_get_bushandle(adapter->pci_mem);
2454         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2455
2456         /* Legacy defaults */
2457         adapter->num_queues = 1;
2458         adapter->hw.back = &adapter->osdep;
2459
2460         /*
2461         ** Now setup MSI or MSI/X, should
2462         ** return us the number of supported
2463         ** vectors. (Will be 1 for MSI)
2464         */
2465         adapter->msix = ixgbe_setup_msix(adapter);
2466         return (0);
2467 }
2468
2469 static void
2470 ixgbe_free_pci_resources(struct adapter * adapter)
2471 {
2472         struct          ix_queue *que = adapter->queues;
2473         device_t        dev = adapter->dev;
2474         int             rid, memrid;
2475
2476         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2477                 memrid = PCIR_BAR(MSIX_82598_BAR);
2478         else
2479                 memrid = PCIR_BAR(MSIX_82599_BAR);
2480
2481         /*
2482         ** There is a slight possibility of a failure mode
2483         ** in attach that will result in entering this function
2484         ** before interrupt resources have been initialized, and
2485         ** in that case we do not want to execute the loops below
2486         ** We can detect this reliably by the state of the adapter
2487         ** res pointer.
2488         */
2489         if (adapter->res == NULL)
2490                 goto mem;
2491
2492         /*
2493         **  Release all msix queue resources:
2494         */
2495         for (int i = 0; i < adapter->num_queues; i++, que++) {
2496                 rid = que->msix + 1;
2497                 if (que->tag != NULL) {
2498                         bus_teardown_intr(dev, que->res, que->tag);
2499                         que->tag = NULL;
2500                 }
2501                 if (que->res != NULL)
2502                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2503         }
2504
2505
2506         /* Clean the Legacy or Link interrupt last */
2507         if (adapter->linkvec) /* we are doing MSIX */
2508                 rid = adapter->linkvec + 1;
2509         else
2510                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2511
2512         if (adapter->tag != NULL) {
2513                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2514                 adapter->tag = NULL;
2515         }
2516         if (adapter->res != NULL)
2517                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2518         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2519                 pci_release_msi(adapter->dev);
2520
2521 mem:
2522         if (adapter->msix)
2523                 pci_release_msi(dev);
2524
2525         if (adapter->msix_mem != NULL)
2526                 bus_release_resource(dev, SYS_RES_MEMORY,
2527                     memrid, adapter->msix_mem);
2528
2529         if (adapter->pci_mem != NULL)
2530                 bus_release_resource(dev, SYS_RES_MEMORY,
2531                     PCIR_BAR(0), adapter->pci_mem);
2532
2533         return;
2534 }
2535
2536 /*********************************************************************
2537  *
2538  *  Setup networking device structure and register an interface.
2539  *
2540  **********************************************************************/
2541 static int
2542 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2543 {
2544         struct ixgbe_hw *hw = &adapter->hw;
2545         struct ifnet   *ifp;
2546
2547         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2548
2549         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2550         if (ifp == NULL) {
2551                 device_printf(dev, "can not allocate ifnet structure\n");
2552                 return (-1);
2553         }
2554         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2555         ifp->if_baudrate = 1000000000;
2556         ifp->if_init = ixgbe_init;
2557         ifp->if_softc = adapter;
2558         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2559         ifp->if_ioctl = ixgbe_ioctl;
2560         ifp->if_start = ixgbe_start;
2561 #if 0 /* __FreeBSD_version >= 800000 */
2562         ifp->if_transmit = ixgbe_mq_start;
2563         ifp->if_qflush = ixgbe_qflush;
2564 #endif
2565         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2566
2567         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2568
2569         adapter->max_frame_size =
2570             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2571
2572         /*
2573          * Tell the upper layer(s) we support long frames.
2574          */
2575         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2576
2577         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2578         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2579         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2580 #if 0 /* NET_TSO */
2581                              |  IFCAP_VLAN_HWTSO
2582 #endif
2583                              |  IFCAP_VLAN_MTU;
2584         ifp->if_capenable = ifp->if_capabilities;
2585
2586         /* Don't enable LRO by default */
2587 #if 0 /* NET_LRO */
2588         ifp->if_capabilities |= IFCAP_LRO;
2589 #endif
2590
2591         /*
2592         ** Don't turn this on by default, if vlans are
2593         ** created on another pseudo device (eg. lagg)
2594         ** then vlan events are not passed thru, breaking
2595         ** operation, but with HW FILTER off it works. If
2596         ** using vlans directly on the ixgbe driver you can
2597         ** enable this and get full hardware tag filtering.
2598         */
2599         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2600
2601         /*
2602          * Specify the media types supported by this adapter and register
2603          * callbacks to update media and link information
2604          */
2605         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2606                      ixgbe_media_status);
2607         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2608         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2609         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2610                 ifmedia_add(&adapter->media,
2611                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2612                 ifmedia_add(&adapter->media,
2613                     IFM_ETHER | IFM_1000_T, 0, NULL);
2614         }
2615         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2616         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2617
2618         return (0);
2619 }
2620
2621 static void
2622 ixgbe_config_link(struct adapter *adapter)
2623 {
2624         struct ixgbe_hw *hw = &adapter->hw;
2625         u32     autoneg, err = 0;
2626         bool    sfp, negotiate;
2627
2628         sfp = ixgbe_is_sfp(hw);
2629
2630         if (sfp) { 
2631                 if (hw->phy.multispeed_fiber) {
2632                         hw->mac.ops.setup_sfp(hw);
2633                         ixgbe_enable_tx_laser(hw);
2634                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2635                 } else
2636                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2637         } else {
2638                 if (hw->mac.ops.check_link)
2639                         err = ixgbe_check_link(hw, &autoneg,
2640                             &adapter->link_up, FALSE);
2641                 if (err)
2642                         goto out;
2643                 autoneg = hw->phy.autoneg_advertised;
2644                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2645                         err  = hw->mac.ops.get_link_capabilities(hw,
2646                             &autoneg, &negotiate);
2647                 if (err)
2648                         goto out;
2649                 if (hw->mac.ops.setup_link)
2650                         err = hw->mac.ops.setup_link(hw, autoneg,
2651                             negotiate, adapter->link_up);
2652         }
2653 out:
2654         return;
2655 }
2656
2657 /********************************************************************
2658  * Manage DMA'able memory.
2659  *******************************************************************/
2660 static void
2661 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2662 {
2663         if (error)
2664                 return;
2665         *(bus_addr_t *) arg = segs->ds_addr;
2666         return;
2667 }
2668
2669 static int
2670 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2671                 struct ixgbe_dma_alloc *dma, int mapflags)
2672 {
2673         device_t dev = adapter->dev;
2674         int             r;
2675
2676         r = bus_dma_tag_create(NULL,    /* parent */
2677                                DBA_ALIGN, 0,    /* alignment, bounds */
2678                                BUS_SPACE_MAXADDR,       /* lowaddr */
2679                                BUS_SPACE_MAXADDR,       /* highaddr */
2680                                NULL, NULL,      /* filter, filterarg */
2681                                size,    /* maxsize */
2682                                1,       /* nsegments */
2683                                size,    /* maxsegsize */
2684                                BUS_DMA_ALLOCNOW,        /* flags */
2685                                &dma->dma_tag);
2686         if (r != 0) {
2687                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2688                        "error %u\n", r);
2689                 goto fail_0;
2690         }
2691         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2692                              BUS_DMA_NOWAIT, &dma->dma_map);
2693         if (r != 0) {
2694                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2695                        "error %u\n", r);
2696                 goto fail_1;
2697         }
2698         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2699                             size,
2700                             ixgbe_dmamap_cb,
2701                             &dma->dma_paddr,
2702                             mapflags | BUS_DMA_NOWAIT);
2703         if (r != 0) {
2704                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2705                        "error %u\n", r);
2706                 goto fail_2;
2707         }
2708         dma->dma_size = size;
2709         return (0);
2710 fail_2:
2711         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2712 fail_1:
2713         bus_dma_tag_destroy(dma->dma_tag);
2714 fail_0:
2715         dma->dma_map = NULL;
2716         dma->dma_tag = NULL;
2717         return (r);
2718 }
2719
2720 static void
2721 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2722 {
2723         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2724             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2725         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2726         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2727         bus_dma_tag_destroy(dma->dma_tag);
2728 }
2729
2730
2731 /*********************************************************************
2732  *
2733  *  Allocate memory for the transmit and receive rings, and then
2734  *  the descriptors associated with each, called only once at attach.
2735  *
2736  **********************************************************************/
2737 static int
2738 ixgbe_allocate_queues(struct adapter *adapter)
2739 {
2740         device_t        dev = adapter->dev;
2741         struct ix_queue *que;
2742         struct tx_ring  *txr;
2743         struct rx_ring  *rxr;
2744         int rsize, tsize, error = IXGBE_SUCCESS;
2745         int txconf = 0, rxconf = 0;
2746
2747         /* First allocate the top level queue structs */
2748         if (!(adapter->queues =
2749             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2750             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2751                 device_printf(dev, "Unable to allocate queue memory\n");
2752                 error = ENOMEM;
2753                 goto fail;
2754         }
2755
2756         /* First allocate the TX ring struct memory */
2757         if (!(adapter->tx_rings =
2758             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2759             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2760                 device_printf(dev, "Unable to allocate TX ring memory\n");
2761                 error = ENOMEM;
2762                 goto tx_fail;
2763         }
2764
2765         /* Next allocate the RX */
2766         if (!(adapter->rx_rings =
2767             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2768             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2769                 device_printf(dev, "Unable to allocate RX ring memory\n");
2770                 error = ENOMEM;
2771                 goto rx_fail;
2772         }
2773
2774         /* For the ring itself */
2775         tsize = roundup2(adapter->num_tx_desc *
2776             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2777
2778         /*
2779          * Now set up the TX queues, txconf is needed to handle the
2780          * possibility that things fail midcourse and we need to
2781          * undo memory gracefully
2782          */ 
2783         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2784                 /* Set up some basics */
2785                 txr = &adapter->tx_rings[i];
2786                 txr->adapter = adapter;
2787                 txr->me = i;
2788
2789                 /* Initialize the TX side lock */
2790                 ksnprintf(txr->lock_name, sizeof(txr->lock_name), "%s:tx(%d)",
2791                     device_get_nameunit(dev), txr->me);
2792                 lockinit(&txr->tx_lock, txr->lock_name, 0, LK_CANRECURSE);
2793
2794                 if (ixgbe_dma_malloc(adapter, tsize,
2795                         &txr->txdma, BUS_DMA_NOWAIT)) {
2796                         device_printf(dev,
2797                             "Unable to allocate TX Descriptor memory\n");
2798                         error = ENOMEM;
2799                         goto err_tx_desc;
2800                 }
2801                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2802                 bzero((void *)txr->tx_base, tsize);
2803
2804                 /* Now allocate transmit buffers for the ring */
2805                 if (ixgbe_allocate_transmit_buffers(txr)) {
2806                         device_printf(dev,
2807                             "Critical Failure setting up transmit buffers\n");
2808                         error = ENOMEM;
2809                         goto err_tx_desc;
2810                 }
2811 #if 0 /* __FreeBSD_version >= 800000 */
2812                 /* Allocate a buf ring */
2813                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2814                     M_WAITOK, &txr->tx_mtx);
2815                 if (txr->br == NULL) {
2816                         device_printf(dev,
2817                             "Critical Failure setting up buf ring\n");
2818                         error = ENOMEM;
2819                         goto err_tx_desc;
2820                 }
2821 #endif
2822         }
2823
2824         /*
2825          * Next the RX queues...
2826          */ 
2827         rsize = roundup2(adapter->num_rx_desc *
2828             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2829         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2830                 rxr = &adapter->rx_rings[i];
2831                 /* Set up some basics */
2832                 rxr->adapter = adapter;
2833                 rxr->me = i;
2834
2835                 /* Initialize the RX side lock */
2836                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2837                     device_get_nameunit(dev), rxr->me);
2838                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2839
2840                 if (ixgbe_dma_malloc(adapter, rsize,
2841                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2842                         device_printf(dev,
2843                             "Unable to allocate RxDescriptor memory\n");
2844                         error = ENOMEM;
2845                         goto err_rx_desc;
2846                 }
2847                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2848                 bzero((void *)rxr->rx_base, rsize);
2849
2850                 /* Allocate receive buffers for the ring*/
2851                 if (ixgbe_allocate_receive_buffers(rxr)) {
2852                         device_printf(dev,
2853                             "Critical Failure setting up receive buffers\n");
2854                         error = ENOMEM;
2855                         goto err_rx_desc;
2856                 }
2857         }
2858
2859         /*
2860         ** Finally set up the queue holding structs
2861         */
2862         for (int i = 0; i < adapter->num_queues; i++) {
2863                 que = &adapter->queues[i];
2864                 que->adapter = adapter;
2865                 que->txr = &adapter->tx_rings[i];
2866                 que->rxr = &adapter->rx_rings[i];
2867         }
2868
2869         return (0);
2870
2871 err_rx_desc:
2872         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2873                 ixgbe_dma_free(adapter, &rxr->rxdma);
2874 err_tx_desc:
2875         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2876                 ixgbe_dma_free(adapter, &txr->txdma);
2877         kfree(adapter->rx_rings, M_DEVBUF);
2878 rx_fail:
2879         kfree(adapter->tx_rings, M_DEVBUF);
2880 tx_fail:
2881         kfree(adapter->queues, M_DEVBUF);
2882 fail:
2883         return (error);
2884 }
2885
2886 /*********************************************************************
2887  *
2888  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2889  *  the information needed to transmit a packet on the wire. This is
2890  *  called only once at attach, setup is done every reset.
2891  *
2892  **********************************************************************/
2893 static int
2894 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2895 {
2896         struct adapter *adapter = txr->adapter;
2897         device_t dev = adapter->dev;
2898         struct ixgbe_tx_buf *txbuf;
2899         int error, i;
2900
2901         /*
2902          * Setup DMA descriptor areas.
2903          */
2904         if ((error = bus_dma_tag_create(
2905                                NULL,    /* parent */
2906                                1, 0,            /* alignment, bounds */
2907                                BUS_SPACE_MAXADDR,       /* lowaddr */
2908                                BUS_SPACE_MAXADDR,       /* highaddr */
2909                                NULL, NULL,              /* filter, filterarg */
2910                                IXGBE_TSO_SIZE,          /* maxsize */
2911                                adapter->num_segs,       /* nsegments */
2912                                PAGE_SIZE,               /* maxsegsize */
2913                                0,                       /* flags */
2914                                &txr->txtag))) {
2915                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2916                 goto fail;
2917         }
2918
2919         if (!(txr->tx_buffers =
2920             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2921             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2922                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2923                 error = ENOMEM;
2924                 goto fail;
2925         }
2926
2927         /* Create the descriptor buffer dma maps */
2928         txbuf = txr->tx_buffers;
2929         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2930                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2931                 if (error != 0) {
2932                         device_printf(dev, "Unable to create TX DMA map\n");
2933                         goto fail;
2934                 }
2935         }
2936
2937         return 0;
2938 fail:
2939         /* We free all, it handles case where we are in the middle */
2940         ixgbe_free_transmit_structures(adapter);
2941         return (error);
2942 }
2943
2944 /*********************************************************************
2945  *
2946  *  Initialize a transmit ring.
2947  *
2948  **********************************************************************/
2949 static void
2950 ixgbe_setup_transmit_ring(struct tx_ring *txr)
2951 {
2952         struct adapter *adapter = txr->adapter;
2953         struct ixgbe_tx_buf *txbuf;
2954         int i;
2955 #ifdef DEV_NETMAP
2956         struct netmap_adapter *na = NA(adapter->ifp);
2957         struct netmap_slot *slot;
2958 #endif /* DEV_NETMAP */
2959
2960         /* Clear the old ring contents */
2961         IXGBE_TX_LOCK(txr);
2962 #ifdef DEV_NETMAP
2963         /*
2964          * (under lock): if in netmap mode, do some consistency
2965          * checks and set slot to entry 0 of the netmap ring.
2966          */
2967         slot = netmap_reset(na, NR_TX, txr->me, 0);
2968 #endif /* DEV_NETMAP */
2969         bzero((void *)txr->tx_base,
2970               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
2971         /* Reset indices */
2972         txr->next_avail_desc = 0;
2973         txr->next_to_clean = 0;
2974
2975         /* Free any existing tx buffers. */
2976         txbuf = txr->tx_buffers;
2977         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2978                 if (txbuf->m_head != NULL) {
2979                         bus_dmamap_sync(txr->txtag, txbuf->map,
2980                             BUS_DMASYNC_POSTWRITE);
2981                         bus_dmamap_unload(txr->txtag, txbuf->map);
2982                         m_freem(txbuf->m_head);
2983                         txbuf->m_head = NULL;
2984                 }
2985 #ifdef DEV_NETMAP
2986                 /*
2987                  * In netmap mode, set the map for the packet buffer.
2988                  * NOTE: Some drivers (not this one) also need to set
2989                  * the physical buffer address in the NIC ring.
2990                  * Slots in the netmap ring (indexed by "si") are
2991                  * kring->nkr_hwofs positions "ahead" wrt the
2992                  * corresponding slot in the NIC ring. In some drivers
2993                  * (not here) nkr_hwofs can be negative. Function
2994                  * netmap_idx_n2k() handles wraparounds properly.
2995                  */
2996                 if (slot) {
2997                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
2998                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
2999                 }
3000 #endif /* DEV_NETMAP */
3001                 /* Clear the EOP index */
3002                 txbuf->eop_index = -1;
3003         }
3004
3005 #ifdef IXGBE_FDIR
3006         /* Set the rate at which we sample packets */
3007         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3008                 txr->atr_sample = atr_sample_rate;
3009 #endif
3010
3011         /* Set number of descriptors available */
3012         txr->tx_avail = adapter->num_tx_desc;
3013
3014         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3015             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3016         IXGBE_TX_UNLOCK(txr);
3017 }
3018
3019 /*********************************************************************
3020  *
3021  *  Initialize all transmit rings.
3022  *
3023  **********************************************************************/
3024 static int
3025 ixgbe_setup_transmit_structures(struct adapter *adapter)
3026 {
3027         struct tx_ring *txr = adapter->tx_rings;
3028
3029         for (int i = 0; i < adapter->num_queues; i++, txr++)
3030                 ixgbe_setup_transmit_ring(txr);
3031
3032         return (0);
3033 }
3034
3035 /*********************************************************************
3036  *
3037  *  Enable transmit unit.
3038  *
3039  **********************************************************************/
3040 static void
3041 ixgbe_initialize_transmit_units(struct adapter *adapter)
3042 {
3043         struct tx_ring  *txr = adapter->tx_rings;
3044         struct ixgbe_hw *hw = &adapter->hw;
3045
3046         /* Setup the Base and Length of the Tx Descriptor Ring */
3047
3048         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3049                 u64     tdba = txr->txdma.dma_paddr;
3050                 u32     txctrl;
3051
3052                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3053                        (tdba & 0x00000000ffffffffULL));
3054                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3055                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3056                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
3057
3058                 /* Setup the HW Tx Head and Tail descriptor pointers */
3059                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3060                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3061
3062                 /* Setup Transmit Descriptor Cmd Settings */
3063                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3064                 txr->queue_status = IXGBE_QUEUE_IDLE;
3065
3066                 /* Disable Head Writeback */
3067                 switch (hw->mac.type) {
3068                 case ixgbe_mac_82598EB:
3069                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3070                         break;
3071                 case ixgbe_mac_82599EB:
3072                 case ixgbe_mac_X540:
3073                 default:
3074                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3075                         break;
3076                 }
3077                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3078                 switch (hw->mac.type) {
3079                 case ixgbe_mac_82598EB:
3080                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3081                         break;
3082                 case ixgbe_mac_82599EB:
3083                 case ixgbe_mac_X540:
3084                 default:
3085                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3086                         break;
3087                 }
3088
3089         }
3090
3091         if (hw->mac.type != ixgbe_mac_82598EB) {
3092                 u32 dmatxctl, rttdcs;
3093                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3094                 dmatxctl |= IXGBE_DMATXCTL_TE;
3095                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3096                 /* Disable arbiter to set MTQC */
3097                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3098                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
3099                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3100                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3101                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3102                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3103         }
3104
3105         return;
3106 }
3107
3108 /*********************************************************************
3109  *
3110  *  Free all transmit rings.
3111  *
3112  **********************************************************************/
3113 static void
3114 ixgbe_free_transmit_structures(struct adapter *adapter)
3115 {
3116         struct tx_ring *txr = adapter->tx_rings;
3117
3118         for (int i = 0; i < adapter->num_queues; i++, txr++) {
3119                 IXGBE_TX_LOCK(txr);
3120                 ixgbe_free_transmit_buffers(txr);
3121                 ixgbe_dma_free(adapter, &txr->txdma);
3122                 IXGBE_TX_UNLOCK(txr);
3123                 IXGBE_TX_LOCK_DESTROY(txr);
3124         }
3125         kfree(adapter->tx_rings, M_DEVBUF);
3126 }
3127
3128 /*********************************************************************
3129  *
3130  *  Free transmit ring related data structures.
3131  *
3132  **********************************************************************/
3133 static void
3134 ixgbe_free_transmit_buffers(struct tx_ring *txr)
3135 {
3136         struct adapter *adapter = txr->adapter;
3137         struct ixgbe_tx_buf *tx_buffer;
3138         int             i;
3139
3140         INIT_DEBUGOUT("free_transmit_ring: begin");
3141
3142         if (txr->tx_buffers == NULL)
3143                 return;
3144
3145         tx_buffer = txr->tx_buffers;
3146         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3147                 if (tx_buffer->m_head != NULL) {
3148                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3149                             BUS_DMASYNC_POSTWRITE);
3150                         bus_dmamap_unload(txr->txtag,
3151                             tx_buffer->map);
3152                         m_freem(tx_buffer->m_head);
3153                         tx_buffer->m_head = NULL;
3154                         if (tx_buffer->map != NULL) {
3155                                 bus_dmamap_destroy(txr->txtag,
3156                                     tx_buffer->map);
3157                                 tx_buffer->map = NULL;
3158                         }
3159                 } else if (tx_buffer->map != NULL) {
3160                         bus_dmamap_unload(txr->txtag,
3161                             tx_buffer->map);
3162                         bus_dmamap_destroy(txr->txtag,
3163                             tx_buffer->map);
3164                         tx_buffer->map = NULL;
3165                 }
3166         }
3167 #if 0 /* __FreeBSD_version >= 800000 */
3168         if (txr->br != NULL)
3169                 buf_ring_free(txr->br, M_DEVBUF);
3170 #endif
3171         if (txr->tx_buffers != NULL) {
3172                 kfree(txr->tx_buffers, M_DEVBUF);
3173                 txr->tx_buffers = NULL;
3174         }
3175         if (txr->txtag != NULL) {
3176                 bus_dma_tag_destroy(txr->txtag);
3177                 txr->txtag = NULL;
3178         }
3179         return;
3180 }
3181
3182 /*********************************************************************
3183  *
3184  *  Advanced Context Descriptor setup for VLAN or CSUM
3185  *
3186  **********************************************************************/
3187
3188 static bool
3189 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3190 {
3191         struct adapter *adapter = txr->adapter;
3192         struct ixgbe_adv_tx_context_desc *TXD;
3193         struct ixgbe_tx_buf        *tx_buffer;
3194         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3195         struct ether_vlan_header *eh;
3196         struct ip *ip;
3197         struct ip6_hdr *ip6;
3198         int  ehdrlen, ip_hlen = 0;
3199         u16     etype;
3200         u8      ipproto = 0;
3201         bool    offload = TRUE;
3202         int ctxd = txr->next_avail_desc;
3203         u16 vtag = 0;
3204
3205
3206         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3207                 offload = FALSE;
3208
3209         tx_buffer = &txr->tx_buffers[ctxd];
3210         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3211
3212         /*
3213         ** In advanced descriptors the vlan tag must 
3214         ** be placed into the descriptor itself.
3215         */
3216         if (mp->m_flags & M_VLANTAG) {
3217                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3218                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3219         } else if (offload == FALSE)
3220                 return FALSE;
3221
3222         /*
3223          * Determine where frame payload starts.
3224          * Jump over vlan headers if already present,
3225          * helpful for QinQ too.
3226          */
3227         eh = mtod(mp, struct ether_vlan_header *);
3228         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3229                 etype = ntohs(eh->evl_proto);
3230                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3231         } else {
3232                 etype = ntohs(eh->evl_encap_proto);
3233                 ehdrlen = ETHER_HDR_LEN;
3234         }
3235
3236         /* Set the ether header length */
3237         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3238
3239         switch (etype) {
3240                 case ETHERTYPE_IP:
3241                         ip = (struct ip *)(mp->m_data + ehdrlen);
3242                         ip_hlen = ip->ip_hl << 2;
3243                         ipproto = ip->ip_p;
3244                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3245                         break;
3246                 case ETHERTYPE_IPV6:
3247                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3248                         ip_hlen = sizeof(struct ip6_hdr);
3249                         /* XXX-BZ this will go badly in case of ext hdrs. */
3250                         ipproto = ip6->ip6_nxt;
3251                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3252                         break;
3253                 default:
3254                         offload = FALSE;
3255                         break;
3256         }
3257
3258         vlan_macip_lens |= ip_hlen;
3259         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3260
3261         switch (ipproto) {
3262                 case IPPROTO_TCP:
3263                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3264                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3265                         break;
3266
3267                 case IPPROTO_UDP:
3268                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3269                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3270                         break;
3271
3272 #if 0
3273                 case IPPROTO_SCTP:
3274                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3275                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3276                         break;
3277 #endif
3278                 default:
3279                         offload = FALSE;
3280                         break;
3281         }
3282
3283         /* Now copy bits into descriptor */
3284         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3285         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3286         TXD->seqnum_seed = htole32(0);
3287         TXD->mss_l4len_idx = htole32(0);
3288
3289         tx_buffer->m_head = NULL;
3290         tx_buffer->eop_index = -1;
3291
3292         /* We've consumed the first desc, adjust counters */
3293         if (++ctxd == adapter->num_tx_desc)
3294                 ctxd = 0;
3295         txr->next_avail_desc = ctxd;
3296         --txr->tx_avail;
3297
3298         return (offload);
3299 }
3300
3301 /**********************************************************************
3302  *
3303  *  Setup work for hardware segmentation offload (TSO) on
3304  *  adapters using advanced tx descriptors
3305  *
3306  **********************************************************************/
3307 static bool
3308 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3309     u32 *olinfo_status)
3310 {
3311         struct adapter *adapter = txr->adapter;
3312         struct ixgbe_adv_tx_context_desc *TXD;
3313         struct ixgbe_tx_buf        *tx_buffer;
3314         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3315         u16 vtag = 0, eh_type;
3316         u32 mss_l4len_idx = 0, len;
3317         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3318         struct ether_vlan_header *eh;
3319 #if 0 /* IPv6 TSO */
3320 #ifdef INET6
3321         struct ip6_hdr *ip6;
3322 #endif
3323 #endif
3324 #ifdef INET
3325         struct ip *ip;
3326 #endif
3327         struct tcphdr *th;
3328
3329
3330         /*
3331          * Determine where frame payload starts.
3332          * Jump over vlan headers if already present
3333          */
3334         eh = mtod(mp, struct ether_vlan_header *);
3335         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3336                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3337                 eh_type = eh->evl_proto;
3338         } else {
3339                 ehdrlen = ETHER_HDR_LEN;
3340                 eh_type = eh->evl_encap_proto;
3341         }
3342
3343         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3344         len = ehdrlen + sizeof(struct tcphdr);
3345         switch (ntohs(eh_type)) {
3346 #if 0 /* IPv6 TSO */
3347 #ifdef INET6
3348         case ETHERTYPE_IPV6:
3349                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3350                         return FALSE;
3351                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3352                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3353                 if (ip6->ip6_nxt != IPPROTO_TCP)
3354                         return FALSE;
3355                 ip_hlen = sizeof(struct ip6_hdr);
3356                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3357                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3358                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3359                 break;
3360 #endif
3361 #endif
3362 #ifdef INET
3363         case ETHERTYPE_IP:
3364                 if (mp->m_len < len + sizeof(struct ip))
3365                         return FALSE;
3366                 ip = (struct ip *)(mp->m_data + ehdrlen);
3367                 if (ip->ip_p != IPPROTO_TCP)
3368                         return FALSE;
3369                 ip->ip_sum = 0;
3370                 ip_hlen = ip->ip_hl << 2;
3371                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3372                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3373                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3374                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3375                 /* Tell transmit desc to also do IPv4 checksum. */
3376                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3377                 break;
3378 #endif
3379         default:
3380                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3381                     __func__, ntohs(eh_type));
3382                 break;
3383         }
3384
3385         ctxd = txr->next_avail_desc;
3386         tx_buffer = &txr->tx_buffers[ctxd];
3387         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3388
3389         tcp_hlen = th->th_off << 2;
3390
3391         /* This is used in the transmit desc in encap */
3392         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3393
3394         /* VLAN MACLEN IPLEN */
3395         if (mp->m_flags & M_VLANTAG) {
3396                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3397                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3398         }
3399
3400         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3401         vlan_macip_lens |= ip_hlen;
3402         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3403
3404         /* ADV DTYPE TUCMD */
3405         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3406         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3407         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3408
3409         /* MSS L4LEN IDX */
3410         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3411         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3412         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3413
3414         TXD->seqnum_seed = htole32(0);
3415         tx_buffer->m_head = NULL;
3416         tx_buffer->eop_index = -1;
3417
3418         if (++ctxd == adapter->num_tx_desc)
3419                 ctxd = 0;
3420
3421         txr->tx_avail--;
3422         txr->next_avail_desc = ctxd;
3423         return TRUE;
3424 }
3425
3426 #ifdef IXGBE_FDIR
3427 /*
3428 ** This routine parses packet headers so that Flow
3429 ** Director can make a hashed filter table entry 
3430 ** allowing traffic flows to be identified and kept
3431 ** on the same cpu.  This would be a performance
3432 ** hit, but we only do it at IXGBE_FDIR_RATE of
3433 ** packets.
3434 */
3435 static void
3436 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3437 {
3438         struct adapter                  *adapter = txr->adapter;
3439         struct ix_queue                 *que;
3440         struct ip                       *ip;
3441         struct tcphdr                   *th;
3442         struct udphdr                   *uh;
3443         struct ether_vlan_header        *eh;
3444         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3445         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3446         int                             ehdrlen, ip_hlen;
3447         u16                             etype;
3448
3449         eh = mtod(mp, struct ether_vlan_header *);
3450         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3451                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3452                 etype = eh->evl_proto;
3453         } else {
3454                 ehdrlen = ETHER_HDR_LEN;
3455                 etype = eh->evl_encap_proto;
3456         }
3457
3458         /* Only handling IPv4 */
3459         if (etype != htons(ETHERTYPE_IP))
3460                 return;
3461
3462         ip = (struct ip *)(mp->m_data + ehdrlen);
3463         ip_hlen = ip->ip_hl << 2;
3464
3465         /* check if we're UDP or TCP */
3466         switch (ip->ip_p) {
3467         case IPPROTO_TCP:
3468                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3469                 /* src and dst are inverted */
3470                 common.port.dst ^= th->th_sport;
3471                 common.port.src ^= th->th_dport;
3472                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3473                 break;
3474         case IPPROTO_UDP:
3475                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3476                 /* src and dst are inverted */
3477                 common.port.dst ^= uh->uh_sport;
3478                 common.port.src ^= uh->uh_dport;
3479                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3480                 break;
3481         default:
3482                 return;
3483         }
3484
3485         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3486         if (mp->m_pkthdr.ether_vtag)
3487                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3488         else
3489                 common.flex_bytes ^= etype;
3490         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3491
3492         que = &adapter->queues[txr->me];
3493         /*
3494         ** This assumes the Rx queue and Tx
3495         ** queue are bound to the same CPU
3496         */
3497         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3498             input, common, que->msix);
3499 }
3500 #endif /* IXGBE_FDIR */
3501
3502 /**********************************************************************
3503  *
3504  *  Examine each tx_buffer in the used queue. If the hardware is done
3505  *  processing the packet then free associated resources. The
3506  *  tx_buffer is put back on the free queue.
3507  *
3508  **********************************************************************/
3509 static bool
3510 ixgbe_txeof(struct tx_ring *txr)
3511 {
3512         struct adapter  *adapter = txr->adapter;
3513         struct ifnet    *ifp = adapter->ifp;
3514         u32     first, last, done, processed;
3515         struct ixgbe_tx_buf *tx_buffer;
3516         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3517
3518         KKASSERT(lockstatus(&txr->tx_lock, curthread) != 0);
3519
3520 #ifdef DEV_NETMAP
3521         if (ifp->if_capenable & IFCAP_NETMAP) {
3522                 struct netmap_adapter *na = NA(ifp);
3523                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3524
3525                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3526
3527                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3528                     BUS_DMASYNC_POSTREAD);
3529                 /*
3530                  * In netmap mode, all the work is done in the context
3531                  * of the client thread. Interrupt handlers only wake up
3532                  * clients, which may be sleeping on individual rings
3533                  * or on a global resource for all rings.
3534                  * To implement tx interrupt mitigation, we wake up the client
3535                  * thread roughly every half ring, even if the NIC interrupts
3536                  * more frequently. This is implemented as follows:
3537                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3538                  *   the slot that should wake up the thread (nkr_num_slots
3539                  *   means the user thread should not be woken up);
3540                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3541                  *   or the slot has the DD bit set.
3542                  *
3543                  * When the driver has separate locks, we need to
3544                  * release and re-acquire txlock to avoid deadlocks.
3545                  * XXX see if we can find a better way.
3546                  */
3547                 if (!netmap_mitigate ||
3548                     (kring->nr_kflags < kring->nkr_num_slots &&
3549                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3550                         kring->nr_kflags = kring->nkr_num_slots;
3551                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3552                         IXGBE_TX_UNLOCK(txr);
3553                         IXGBE_CORE_LOCK(adapter);
3554                         selwakeuppri(&na->tx_si, PI_NET);
3555                         IXGBE_CORE_UNLOCK(adapter);
3556                         IXGBE_TX_LOCK(txr);
3557                 }
3558                 return FALSE;
3559         }
3560 #endif /* DEV_NETMAP */
3561
3562         if (txr->tx_avail == adapter->num_tx_desc) {
3563                 txr->queue_status = IXGBE_QUEUE_IDLE;
3564                 return FALSE;
3565         }
3566
3567         processed = 0;
3568         first = txr->next_to_clean;
3569         tx_buffer = &txr->tx_buffers[first];
3570         /* For cleanup we just use legacy struct */
3571         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3572         last = tx_buffer->eop_index;
3573         if (last == -1)
3574                 return FALSE;
3575         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3576
3577         /*
3578         ** Get the index of the first descriptor
3579         ** BEYOND the EOP and call that 'done'.
3580         ** I do this so the comparison in the
3581         ** inner while loop below can be simple
3582         */
3583         if (++last == adapter->num_tx_desc) last = 0;
3584         done = last;
3585
3586         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3587             BUS_DMASYNC_POSTREAD);
3588         /*
3589         ** Only the EOP descriptor of a packet now has the DD
3590         ** bit set, this is what we look for...
3591         */
3592         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3593                 /* We clean the range of the packet */
3594                 while (first != done) {
3595                         tx_desc->upper.data = 0;
3596                         tx_desc->lower.data = 0;
3597                         tx_desc->buffer_addr = 0;
3598                         ++txr->tx_avail;
3599                         ++processed;
3600
3601                         if (tx_buffer->m_head) {
3602                                 txr->bytes +=
3603                                     tx_buffer->m_head->m_pkthdr.len;
3604                                 bus_dmamap_sync(txr->txtag,
3605                                     tx_buffer->map,
3606                                     BUS_DMASYNC_POSTWRITE);
3607                                 bus_dmamap_unload(txr->txtag,
3608                                     tx_buffer->map);
3609                                 m_freem(tx_buffer->m_head);
3610                                 tx_buffer->m_head = NULL;
3611                                 tx_buffer->map = NULL;
3612                         }
3613                         tx_buffer->eop_index = -1;
3614                         txr->watchdog_time = ticks;
3615
3616                         if (++first == adapter->num_tx_desc)
3617                                 first = 0;
3618
3619                         tx_buffer = &txr->tx_buffers[first];
3620                         tx_desc =
3621                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3622                 }
3623                 ++txr->packets;
3624                 ++ifp->if_opackets;
3625                 /* See if there is more work now */
3626                 last = tx_buffer->eop_index;
3627                 if (last != -1) {
3628                         eop_desc =
3629                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3630                         /* Get next done point */
3631                         if (++last == adapter->num_tx_desc) last = 0;
3632                         done = last;
3633                 } else
3634                         break;
3635         }
3636         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3637             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3638
3639         txr->next_to_clean = first;
3640
3641         /*
3642         ** Watchdog calculation, we know there's
3643         ** work outstanding or the first return
3644         ** would have been taken, so none processed
3645         ** for too long indicates a hang.
3646         */
3647         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3648                 txr->queue_status = IXGBE_QUEUE_HUNG;
3649
3650         /* With a minimum free clear the depleted state bit.  */
3651         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3652                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3653
3654         if (txr->tx_avail == adapter->num_tx_desc) {
3655                 txr->queue_status = IXGBE_QUEUE_IDLE;
3656                 return (FALSE);
3657         }
3658
3659         return TRUE;
3660 }
3661
3662 /*********************************************************************
3663  *
3664  *  Refresh mbuf buffers for RX descriptor rings
3665  *   - now keeps its own state so discards due to resource
3666  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3667  *     it just returns, keeping its placeholder, thus it can simply
3668  *     be recalled to try again.
3669  *
3670  **********************************************************************/
3671 static void
3672 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3673 {
3674         struct adapter          *adapter = rxr->adapter;
3675         bus_dma_segment_t       hseg[1];
3676         bus_dma_segment_t       pseg[1];
3677         struct ixgbe_rx_buf     *rxbuf;
3678         struct mbuf             *mh, *mp;
3679         int                     i, j, nsegs, error;
3680         bool                    refreshed = FALSE;
3681
3682         i = j = rxr->next_to_refresh;
3683         /* Control the loop with one beyond */
3684         if (++j == adapter->num_rx_desc)
3685                 j = 0;
3686
3687         while (j != limit) {
3688                 rxbuf = &rxr->rx_buffers[i];
3689                 if (rxr->hdr_split == FALSE)
3690                         goto no_split;
3691
3692                 if (rxbuf->m_head == NULL) {
3693                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3694                         if (mh == NULL)
3695                                 goto update;
3696                 } else
3697                         mh = rxbuf->m_head;
3698
3699                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3700                 mh->m_len = MHLEN;
3701                 mh->m_flags |= M_PKTHDR;
3702                 /* Get the memory mapping */
3703                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3704                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3705                 if (error != 0) {
3706                         kprintf("Refresh mbufs: hdr dmamap load"
3707                             " failure - %d\n", error);
3708                         m_free(mh);
3709                         rxbuf->m_head = NULL;
3710                         goto update;
3711                 }
3712                 rxbuf->m_head = mh;
3713                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3714                     BUS_DMASYNC_PREREAD);
3715                 rxr->rx_base[i].read.hdr_addr =
3716                     htole64(hseg[0].ds_addr);
3717
3718 no_split:
3719                 if (rxbuf->m_pack == NULL) {
3720                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3721                             M_PKTHDR, adapter->rx_mbuf_sz);
3722                         if (mp == NULL)
3723                                 goto update;
3724                 } else
3725                         mp = rxbuf->m_pack;
3726
3727                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3728                 /* Get the memory mapping */
3729                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3730                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3731                 if (error != 0) {
3732                         kprintf("Refresh mbufs: payload dmamap load"
3733                             " failure - %d\n", error);
3734                         m_free(mp);
3735                         rxbuf->m_pack = NULL;
3736                         goto update;
3737                 }
3738                 rxbuf->m_pack = mp;
3739                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3740                     BUS_DMASYNC_PREREAD);
3741                 rxr->rx_base[i].read.pkt_addr =
3742                     htole64(pseg[0].ds_addr);
3743
3744                 refreshed = TRUE;
3745                 /* Next is precalculated */
3746                 i = j;
3747                 rxr->next_to_refresh = i;
3748                 if (++j == adapter->num_rx_desc)
3749                         j = 0;
3750         }
3751 update:
3752         if (refreshed) /* Update hardware tail index */
3753                 IXGBE_WRITE_REG(&adapter->hw,
3754                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3755         return;
3756 }
3757
3758 /*********************************************************************
3759  *
3760  *  Allocate memory for rx_buffer structures. Since we use one
3761  *  rx_buffer per received packet, the maximum number of rx_buffer's
3762  *  that we'll need is equal to the number of receive descriptors
3763  *  that we've allocated.
3764  *
3765  **********************************************************************/
3766 static int
3767 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3768 {
3769         struct  adapter         *adapter = rxr->adapter;
3770         device_t                dev = adapter->dev;
3771         struct ixgbe_rx_buf     *rxbuf;
3772         int                     i, bsize, error;
3773
3774         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3775         if (!(rxr->rx_buffers =
3776             (struct ixgbe_rx_buf *) kmalloc(bsize,
3777             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3778                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3779                 error = ENOMEM;
3780                 goto fail;
3781         }
3782
3783         if ((error = bus_dma_tag_create(NULL,   /* parent */
3784                                    1, 0,        /* alignment, bounds */
3785                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3786                                    BUS_SPACE_MAXADDR,   /* highaddr */
3787                                    NULL, NULL,          /* filter, filterarg */
3788                                    MSIZE,               /* maxsize */
3789                                    1,                   /* nsegments */
3790                                    MSIZE,               /* maxsegsize */
3791                                    0,                   /* flags */
3792                                    &rxr->htag))) {
3793                 device_printf(dev, "Unable to create RX DMA tag\n");
3794                 goto fail;
3795         }
3796
3797         if ((error = bus_dma_tag_create(NULL,   /* parent */
3798                                    1, 0,        /* alignment, bounds */
3799                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3800                                    BUS_SPACE_MAXADDR,   /* highaddr */
3801                                    NULL, NULL,          /* filter, filterarg */
3802                                    MJUM16BYTES,         /* maxsize */
3803                                    1,                   /* nsegments */
3804                                    MJUM16BYTES,         /* maxsegsize */
3805                                    0,                   /* flags */
3806                                    &rxr->ptag))) {
3807                 device_printf(dev, "Unable to create RX DMA tag\n");
3808                 goto fail;
3809         }
3810
3811         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3812                 rxbuf = &rxr->rx_buffers[i];
3813                 error = bus_dmamap_create(rxr->htag,
3814                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3815                 if (error) {
3816                         device_printf(dev, "Unable to create RX head map\n");
3817                         goto fail;
3818                 }
3819                 error = bus_dmamap_create(rxr->ptag,
3820                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3821                 if (error) {
3822                         device_printf(dev, "Unable to create RX pkt map\n");
3823                         goto fail;
3824                 }
3825         }
3826
3827         return (0);
3828
3829 fail:
3830         /* Frees all, but can handle partial completion */
3831         ixgbe_free_receive_structures(adapter);
3832         return (error);
3833 }
3834
3835 /*
3836 ** Used to detect a descriptor that has
3837 ** been merged by Hardware RSC.
3838 */
3839 static inline u32
3840 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3841 {
3842         return (le32toh(rx->wb.lower.lo_dword.data) &
3843             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3844 }
3845
3846 /*********************************************************************
3847  *
3848  *  Initialize Hardware RSC (LRO) feature on 82599
3849  *  for an RX ring, this is toggled by the LRO capability
3850  *  even though it is transparent to the stack.
3851  *
3852  **********************************************************************/
3853 #if 0   /* NET_LRO */
3854 static void
3855 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3856 {
3857         struct  adapter         *adapter = rxr->adapter;
3858         struct  ixgbe_hw        *hw = &adapter->hw;
3859         u32                     rscctrl, rdrxctl;
3860
3861         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3862         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3863 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3864         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3865 #endif /* DEV_NETMAP */
3866         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3867         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3868         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3869
3870         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3871         rscctrl |= IXGBE_RSCCTL_RSCEN;
3872         /*
3873         ** Limit the total number of descriptors that
3874         ** can be combined, so it does not exceed 64K
3875         */
3876         if (adapter->rx_mbuf_sz == MCLBYTES)
3877                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3878         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3879                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3880         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3881                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3882         else  /* Using 16K cluster */
3883                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3884
3885         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3886
3887         /* Enable TCP header recognition */
3888         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3889             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3890             IXGBE_PSRTYPE_TCPHDR));
3891
3892         /* Disable RSC for ACK packets */
3893         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3894             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3895
3896         rxr->hw_rsc = TRUE;
3897 }
3898 #endif
3899
3900 static void     
3901 ixgbe_free_receive_ring(struct rx_ring *rxr)
3902
3903         struct  adapter         *adapter;
3904         struct ixgbe_rx_buf       *rxbuf;
3905         int i;
3906
3907         adapter = rxr->adapter;
3908         for (i = 0; i < adapter->num_rx_desc; i++) {
3909                 rxbuf = &rxr->rx_buffers[i];
3910                 if (rxbuf->m_head != NULL) {
3911                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3912                             BUS_DMASYNC_POSTREAD);
3913                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3914                         rxbuf->m_head->m_flags |= M_PKTHDR;
3915                         m_freem(rxbuf->m_head);
3916                 }
3917                 if (rxbuf->m_pack != NULL) {
3918                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3919                             BUS_DMASYNC_POSTREAD);
3920                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3921                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3922                         m_freem(rxbuf->m_pack);
3923                 }
3924                 rxbuf->m_head = NULL;
3925                 rxbuf->m_pack = NULL;
3926         }
3927 }
3928
3929
3930 /*********************************************************************
3931  *
3932  *  Initialize a receive ring and its buffers.
3933  *
3934  **********************************************************************/
3935 static int
3936 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3937 {
3938         struct  adapter         *adapter;
3939         struct ifnet            *ifp;
3940         device_t                dev;
3941         struct ixgbe_rx_buf     *rxbuf;
3942         bus_dma_segment_t       pseg[1], hseg[1];
3943 #if 0   /* NET_LRO */
3944         struct lro_ctrl         *lro = &rxr->lro;
3945 #endif
3946         int                     rsize, nsegs, error = 0;
3947 #ifdef DEV_NETMAP
3948         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3949         struct netmap_slot *slot;
3950 #endif /* DEV_NETMAP */
3951
3952         adapter = rxr->adapter;
3953         ifp = adapter->ifp;
3954         dev = adapter->dev;
3955
3956         /* Clear the ring contents */
3957         IXGBE_RX_LOCK(rxr);
3958 #ifdef DEV_NETMAP
3959         /* same as in ixgbe_setup_transmit_ring() */
3960         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3961 #endif /* DEV_NETMAP */
3962         rsize = roundup2(adapter->num_rx_desc *
3963             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3964         bzero((void *)rxr->rx_base, rsize);
3965
3966         /* Free current RX buffer structs and their mbufs */
3967         ixgbe_free_receive_ring(rxr);
3968
3969         /* Configure header split? */
3970         if (ixgbe_header_split)
3971                 rxr->hdr_split = TRUE;
3972
3973         /* Now replenish the mbufs */
3974         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3975                 struct mbuf     *mh, *mp;
3976
3977                 rxbuf = &rxr->rx_buffers[j];
3978 #ifdef DEV_NETMAP
3979                 /*
3980                  * In netmap mode, fill the map and set the buffer
3981                  * address in the NIC ring, considering the offset
3982                  * between the netmap and NIC rings (see comment in
3983                  * ixgbe_setup_transmit_ring() ). No need to allocate
3984                  * an mbuf, so end the block with a continue;
3985                  */
3986                 if (slot) {
3987                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
3988                         uint64_t paddr;
3989                         void *addr;
3990
3991                         addr = PNMB(slot + sj, &paddr);
3992                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
3993                         /* Update descriptor */
3994                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
3995                         continue;
3996                 }
3997 #endif /* DEV_NETMAP */
3998                 /*
3999                 ** Don't allocate mbufs if not
4000                 ** doing header split, its wasteful
4001                 */ 
4002                 if (rxr->hdr_split == FALSE)
4003                         goto skip_head;
4004
4005                 /* First the header */
4006                 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4007                 if (rxbuf->m_head == NULL) {
4008                         error = ENOBUFS;
4009                         goto fail;
4010                 }
4011                 m_adj(rxbuf->m_head, ETHER_ALIGN);
4012                 mh = rxbuf->m_head;
4013                 mh->m_len = mh->m_pkthdr.len = MHLEN;
4014                 mh->m_flags |= M_PKTHDR;
4015                 /* Get the memory mapping */
4016                 error = bus_dmamap_load_mbuf_segment(rxr->htag,