ixgbe: Do not use mbuf jumbo clusters
[dragonfly.git] / sys / dev / netif / ixgbe / ixgbe.c
1 /******************************************************************************
2
3   Copyright (c) 2001-2012, Intel Corporation 
4   All rights reserved.
5   
6   Redistribution and use in source and binary forms, with or without 
7   modification, are permitted provided that the following conditions are met:
8   
9    1. Redistributions of source code must retain the above copyright notice, 
10       this list of conditions and the following disclaimer.
11   
12    2. Redistributions in binary form must reproduce the above copyright 
13       notice, this list of conditions and the following disclaimer in the 
14       documentation and/or other materials provided with the distribution.
15   
16    3. Neither the name of the Intel Corporation nor the names of its 
17       contributors may be used to endorse or promote products derived from 
18       this software without specific prior written permission.
19   
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31
32 ******************************************************************************/
33 /*$FreeBSD: src/sys/dev/ixgbe/ixgbe.c,v 1.70 2012/07/05 20:51:44 jfv Exp $*/
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37
38 #include "ixgbe.h"
39
40 /*********************************************************************
41  *  Set this to one to display debug statistics
42  *********************************************************************/
43 int             ixgbe_display_debug_stats = 0;
44
45 /*********************************************************************
46  *  Driver version
47  *********************************************************************/
48 char ixgbe_driver_version[] = "2.4.8";
49
50 /*********************************************************************
51  *  PCI Device ID Table
52  *
53  *  Used by probe to select devices to load on
54  *  Last field stores an index into ixgbe_strings
55  *  Last entry must be all 0s
56  *
57  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58  *********************************************************************/
59
60 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61 {
62         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
85         {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86         /* required last entry */
87         {0, 0, 0, 0, 0}
88 };
89
90 /*********************************************************************
91  *  Table of branding strings
92  *********************************************************************/
93
94 static char    *ixgbe_strings[] = {
95         "Intel(R) PRO/10GbE PCI-Express Network Driver"
96 };
97
98 /*********************************************************************
99  *  Function prototypes
100  *********************************************************************/
101 static int      ixgbe_probe(device_t);
102 static int      ixgbe_attach(device_t);
103 static int      ixgbe_detach(device_t);
104 static int      ixgbe_shutdown(device_t);
105 static void     ixgbe_start(struct ifnet *);
106 static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
107 #if 0 /* __FreeBSD_version >= 800000 */
108 static int      ixgbe_mq_start(struct ifnet *, struct mbuf *);
109 static int      ixgbe_mq_start_locked(struct ifnet *,
110                     struct tx_ring *, struct mbuf *);
111 static void     ixgbe_qflush(struct ifnet *);
112 #endif
113 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
114 static void     ixgbe_init(void *);
115 static void     ixgbe_init_locked(struct adapter *);
116 static void     ixgbe_stop(void *);
117 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
118 static int      ixgbe_media_change(struct ifnet *);
119 static void     ixgbe_identify_hardware(struct adapter *);
120 static int      ixgbe_allocate_pci_resources(struct adapter *);
121 static int      ixgbe_allocate_msix(struct adapter *);
122 static int      ixgbe_allocate_legacy(struct adapter *);
123 static int      ixgbe_allocate_queues(struct adapter *);
124 static int      ixgbe_setup_msix(struct adapter *);
125 static void     ixgbe_free_pci_resources(struct adapter *);
126 static void     ixgbe_local_timer(void *);
127 static int      ixgbe_setup_interface(device_t, struct adapter *);
128 static void     ixgbe_config_link(struct adapter *);
129
130 static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
131 static int      ixgbe_setup_transmit_structures(struct adapter *);
132 static void     ixgbe_setup_transmit_ring(struct tx_ring *);
133 static void     ixgbe_initialize_transmit_units(struct adapter *);
134 static void     ixgbe_free_transmit_structures(struct adapter *);
135 static void     ixgbe_free_transmit_buffers(struct tx_ring *);
136
137 static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
138 static int      ixgbe_setup_receive_structures(struct adapter *);
139 static int      ixgbe_setup_receive_ring(struct rx_ring *);
140 static void     ixgbe_initialize_receive_units(struct adapter *);
141 static void     ixgbe_free_receive_structures(struct adapter *);
142 static void     ixgbe_free_receive_buffers(struct rx_ring *);
143 #if 0   /* NET_LRO */
144 static void     ixgbe_setup_hw_rsc(struct rx_ring *);
145 #endif
146
147 static void     ixgbe_enable_intr(struct adapter *);
148 static void     ixgbe_disable_intr(struct adapter *);
149 static void     ixgbe_update_stats_counters(struct adapter *);
150 static void     ixgbe_txeof(struct tx_ring *);
151 static void     ixgbe_rxeof(struct ix_queue *, int);
152 static void     ixgbe_rx_checksum(u32, struct mbuf *, u32);
153 static void     ixgbe_set_promisc(struct adapter *);
154 static void     ixgbe_set_multi(struct adapter *);
155 static void     ixgbe_update_link_status(struct adapter *);
156 static void     ixgbe_refresh_mbufs(struct rx_ring *, int);
157 static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158 static int      ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159 static int      ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160 static int      ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161 static int      ixgbe_dma_malloc(struct adapter *, bus_size_t,
162                     struct ixgbe_dma_alloc *, int);
163 static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164 static void     ixgbe_add_rx_process_limit(struct adapter *, const char *,
165                     const char *, int *, int);
166 static bool     ixgbe_tx_ctx_setup(struct tx_ring *, struct mbuf *);
167 static bool     ixgbe_tso_setup(struct tx_ring *, struct mbuf *, u32 *, u32 *);
168 static int      ixgbe_tso_pullup(struct tx_ring *, struct mbuf **);
169 static void     ixgbe_add_sysctl(struct adapter *);
170 static void     ixgbe_set_eitr(struct adapter *, int, int);
171 static int      ixgbe_sysctl_intr_rate(SYSCTL_HANDLER_ARGS);
172 static void     ixgbe_set_ivar(struct adapter *, u8, u8, s8);
173 static void     ixgbe_configure_ivars(struct adapter *);
174 static u8 *     ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
175
176 static void     ixgbe_setup_vlan_hw_support(struct adapter *);
177 static void     ixgbe_register_vlan(void *, struct ifnet *, u16);
178 static void     ixgbe_unregister_vlan(void *, struct ifnet *, u16);
179
180 static void     ixgbe_add_hw_stats(struct adapter *adapter);
181
182 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
183 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
184                     struct mbuf *, u32);
185
186 /* Support for pluggable optic modules */
187 static bool     ixgbe_sfp_probe(struct adapter *);
188 static void     ixgbe_setup_optics(struct adapter *);
189
190 /* Legacy (single vector interrupt handler */
191 static void     ixgbe_legacy_irq(void *);
192
193 /* The MSI/X Interrupt handlers */
194 static void     ixgbe_msix_que(void *);
195 static void     ixgbe_msix_link(void *);
196
197 /* Deferred interrupt tasklets */
198 static void     ixgbe_handle_msf(void *, int);
199 static void     ixgbe_handle_mod(void *, int);
200
201 #ifdef IXGBE_FDIR
202 static void     ixgbe_atr(struct tx_ring *, struct mbuf *);
203 static void     ixgbe_reinit_fdir(void *, int);
204 #endif
205
206 /*********************************************************************
207  *  FreeBSD Device Interface Entry Points
208  *********************************************************************/
209
210 static device_method_t ixgbe_methods[] = {
211         /* Device interface */
212         DEVMETHOD(device_probe, ixgbe_probe),
213         DEVMETHOD(device_attach, ixgbe_attach),
214         DEVMETHOD(device_detach, ixgbe_detach),
215         DEVMETHOD(device_shutdown, ixgbe_shutdown),
216         {0, 0}
217 };
218
219 static driver_t ixgbe_driver = {
220         "ix", ixgbe_methods, sizeof(struct adapter),
221 };
222
223 devclass_t ixgbe_devclass;
224 DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
225
226 MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
227 MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
228
229 /*
230 ** TUNEABLE PARAMETERS:
231 */
232
233 /* How many packets rxeof tries to clean at a time */
234 static int ixgbe_rx_process_limit = 128;
235 TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
236
237 /*
238 ** Smart speed setting, default to on
239 ** this only works as a compile option
240 ** right now as its during attach, set
241 ** this to 'ixgbe_smart_speed_off' to
242 ** disable.
243 */
244 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
245
246 static int ixgbe_msi_enable = 1;
247 TUNABLE_INT("hw.ixgbe.msi.enable", &ixgbe_msi_enable);
248
249 /*
250  * MSIX should be the default for best performance,
251  * but this allows it to be forced off for testing.
252  */
253 static int ixgbe_enable_msix = 1;
254 TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
255
256 /*
257  * Header split: this causes the hardware to DMA
258  * the header into a separate mbuf from the payload,
259  * it can be a performance win in some workloads, but
260  * in others it actually hurts, its off by default. 
261  */
262 static int ixgbe_header_split = FALSE;
263 TUNABLE_INT("hw.ixgbe.hdr_split", &ixgbe_header_split);
264
265 /*
266  * Number of Queues, can be set to 0,
267  * it then autoconfigures based on the
268  * number of cpus with a max of 8. This
269  * can be overriden manually here.
270  */
271 static int ixgbe_num_queues = 0;
272 TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
273
274 /*
275 ** Number of TX descriptors per ring,
276 ** setting higher than RX as this seems
277 ** the better performing choice.
278 */
279 static int ixgbe_txd = PERFORM_TXD;
280 TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
281
282 /* Number of RX descriptors per ring */
283 static int ixgbe_rxd = PERFORM_RXD;
284 TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
285
286 /* Keep running tab on them for sanity check */
287 static int ixgbe_total_ports;
288
289 #ifdef IXGBE_FDIR
290 /*
291 ** For Flow Director: this is the
292 ** number of TX packets we sample
293 ** for the filter pool, this means
294 ** every 20th packet will be probed.
295 **
296 ** This feature can be disabled by 
297 ** setting this to 0.
298 */
299 static int atr_sample_rate = 20;
300 /* 
301 ** Flow Director actually 'steals'
302 ** part of the packet buffer as its
303 ** filter pool, this variable controls
304 ** how much it uses:
305 **  0 = 64K, 1 = 128K, 2 = 256K
306 */
307 static int fdir_pballoc = 1;
308 #endif
309
310 #ifdef DEV_NETMAP
311 /*
312  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
313  * be a reference on how to implement netmap support in a driver.
314  * Additional comments are in ixgbe_netmap.h .
315  *
316  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
317  * that extend the standard driver.
318  */
319 #include <dev/netmap/ixgbe_netmap.h>
320 #endif /* DEV_NETMAP */
321
322 /*********************************************************************
323  *  Device identification routine
324  *
325  *  ixgbe_probe determines if the driver should be loaded on
326  *  adapter based on PCI vendor/device id of the adapter.
327  *
328  *  return BUS_PROBE_DEFAULT on success, positive on failure
329  *********************************************************************/
330
331 static int
332 ixgbe_probe(device_t dev)
333 {
334         ixgbe_vendor_info_t *ent;
335
336         u16     pci_vendor_id = 0;
337         u16     pci_device_id = 0;
338         u16     pci_subvendor_id = 0;
339         u16     pci_subdevice_id = 0;
340         char    adapter_name[256];
341
342         INIT_DEBUGOUT("ixgbe_probe: begin");
343
344         pci_vendor_id = pci_get_vendor(dev);
345         if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
346                 return (ENXIO);
347
348         pci_device_id = pci_get_device(dev);
349         pci_subvendor_id = pci_get_subvendor(dev);
350         pci_subdevice_id = pci_get_subdevice(dev);
351
352         ent = ixgbe_vendor_info_array;
353         while (ent->vendor_id != 0) {
354                 if ((pci_vendor_id == ent->vendor_id) &&
355                     (pci_device_id == ent->device_id) &&
356
357                     ((pci_subvendor_id == ent->subvendor_id) ||
358                      (ent->subvendor_id == 0)) &&
359
360                     ((pci_subdevice_id == ent->subdevice_id) ||
361                      (ent->subdevice_id == 0))) {
362                         ksprintf(adapter_name, "%s, Version - %s",
363                                 ixgbe_strings[ent->index],
364                                 ixgbe_driver_version);
365                         device_set_desc_copy(dev, adapter_name);
366                         ++ixgbe_total_ports;
367                         return (BUS_PROBE_DEFAULT);
368                 }
369                 ent++;
370         }
371         return (ENXIO);
372 }
373
374 /*********************************************************************
375  *  Device initialization routine
376  *
377  *  The attach entry point is called when the driver is being loaded.
378  *  This routine identifies the type of hardware, allocates all resources
379  *  and initializes the hardware.
380  *
381  *  return 0 on success, positive on failure
382  *********************************************************************/
383
384 static int
385 ixgbe_attach(device_t dev)
386 {
387         struct adapter *adapter;
388         struct ixgbe_hw *hw;
389         int             error = 0;
390         u16             csum;
391         u32             ctrl_ext;
392
393         INIT_DEBUGOUT("ixgbe_attach: begin");
394
395         if (resource_disabled("ixgbe", device_get_unit(dev))) {
396                 device_printf(dev, "Disabled by device hint\n");
397                 return (ENXIO);
398         }
399
400         /* Allocate, clear, and link in our adapter structure */
401         adapter = device_get_softc(dev);
402         adapter->dev = adapter->osdep.dev = dev;
403         hw = &adapter->hw;
404
405         /* Core Lock Init*/
406         IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
407
408         /* Set up the timer callout */
409         callout_init_mp(&adapter->timer);
410
411         /* Determine hardware revision */
412         ixgbe_identify_hardware(adapter);
413
414         /* Enable bus mastering */
415         pci_enable_busmaster(dev);
416
417         /* Do base PCI setup - map BAR0 */
418         if (ixgbe_allocate_pci_resources(adapter)) {
419                 device_printf(dev, "Allocation of PCI resources failed\n");
420                 error = ENXIO;
421                 goto err_out;
422         }
423
424         /* Do descriptor calc and sanity checks */
425         if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
426             ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
427                 device_printf(dev, "TXD config issue, using default!\n");
428                 adapter->num_tx_desc = DEFAULT_TXD;
429         } else
430                 adapter->num_tx_desc = ixgbe_txd;
431
432         /*
433         ** With many RX rings it is easy to exceed the
434         ** system mbuf allocation. Tuning nmbclusters
435         ** can alleviate this.
436         */
437         if (nmbclusters > 0 ) {
438                 int s;
439                 s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
440                 if (s > nmbclusters) {
441                         device_printf(dev, "RX Descriptors exceed "
442                             "system mbuf max, using default instead!\n");
443                         ixgbe_rxd = DEFAULT_RXD;
444                 }
445         }
446
447         if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
448             ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
449                 device_printf(dev, "RXD config issue, using default!\n");
450                 adapter->num_rx_desc = DEFAULT_RXD;
451         } else
452                 adapter->num_rx_desc = ixgbe_rxd;
453
454         /* Allocate our TX/RX Queues */
455         if (ixgbe_allocate_queues(adapter)) {
456                 error = ENOMEM;
457                 goto err_out;
458         }
459
460         /* Allocate multicast array memory. */
461         adapter->mta = kmalloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
462             MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
463         if (adapter->mta == NULL) {
464                 device_printf(dev, "Can not allocate multicast setup array\n");
465                 error = ENOMEM;
466                 goto err_late;
467         }
468
469         /* Initialize the shared code */
470         error = ixgbe_init_shared_code(hw);
471         if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
472                 /*
473                 ** No optics in this port, set up
474                 ** so the timer routine will probe 
475                 ** for later insertion.
476                 */
477                 adapter->sfp_probe = TRUE;
478                 error = 0;
479         } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
480                 device_printf(dev,"Unsupported SFP+ module detected!\n");
481                 error = EIO;
482                 goto err_late;
483         } else if (error) {
484                 device_printf(dev,"Unable to initialize the shared code\n");
485                 error = EIO;
486                 goto err_late;
487         }
488
489         /* Make sure we have a good EEPROM before we read from it */
490         if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
491                 device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
492                 error = EIO;
493                 goto err_late;
494         }
495
496         error = ixgbe_init_hw(hw);
497         switch (error) {
498         case IXGBE_ERR_EEPROM_VERSION:
499                 device_printf(dev, "This device is a pre-production adapter/"
500                     "LOM.  Please be aware there may be issues associated "
501                     "with your hardware.\n If you are experiencing problems "
502                     "please contact your Intel or hardware representative "
503                     "who provided you with this hardware.\n");
504                 break;
505         case IXGBE_ERR_SFP_NOT_SUPPORTED:
506                 device_printf(dev,"Unsupported SFP+ Module\n");
507                 error = EIO;
508                 device_printf(dev,"Hardware Initialization Failure\n");
509                 goto err_late;
510         case IXGBE_ERR_SFP_NOT_PRESENT:
511                 device_printf(dev,"No SFP+ Module found\n");
512                 /* falls thru */
513         default:
514                 break;
515         }
516
517         /* Detect and set physical type */
518         ixgbe_setup_optics(adapter);
519
520         if ((adapter->msix > 1) && (ixgbe_enable_msix)) {
521                 adapter->intr_type = PCI_INTR_TYPE_MSIX;
522                 error = ixgbe_allocate_msix(adapter); 
523         } else {
524                 error = ixgbe_allocate_legacy(adapter); 
525         }
526         if (error) 
527                 goto err_late;
528
529         /* Setup OS specific network interface */
530         if (ixgbe_setup_interface(dev, adapter) != 0)
531                 goto err_late;
532
533         /* Add sysctl tree */
534         ixgbe_add_sysctl(adapter);
535
536         /* Initialize statistics */
537         ixgbe_update_stats_counters(adapter);
538
539         /* Register for VLAN events */
540         adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
541             ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
542         adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
543             ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
544
545         /* Print PCIE bus type/speed/width info */
546         ixgbe_get_bus_info(hw);
547         device_printf(dev,"PCI Express Bus: Speed %s %s\n",
548             ((hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0Gb/s":
549             (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5Gb/s":"Unknown"),
550             (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
551             (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
552             (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
553             ("Unknown"));
554
555         if ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
556             (hw->bus.speed == ixgbe_bus_speed_2500)) {
557                 device_printf(dev, "PCI-Express bandwidth available"
558                     " for this card\n     is not sufficient for"
559                     " optimal performance.\n");
560                 device_printf(dev, "For optimal performance a x8 "
561                     "PCIE, or x4 PCIE 2 slot is required.\n");
562         }
563
564         /* let hardware know driver is loaded */
565         ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
566         ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
567         IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
568
569         ixgbe_add_hw_stats(adapter);
570
571 #ifdef DEV_NETMAP
572         ixgbe_netmap_attach(adapter);
573 #endif /* DEV_NETMAP */
574         INIT_DEBUGOUT("ixgbe_attach: end");
575         return (0);
576 err_late:
577         ixgbe_free_transmit_structures(adapter);
578         ixgbe_free_receive_structures(adapter);
579 err_out:
580         if (adapter->ifp != NULL)
581                 if_free(adapter->ifp);
582         ixgbe_free_pci_resources(adapter);
583         kfree(adapter->mta, M_DEVBUF);
584         return (error);
585
586 }
587
588 /*********************************************************************
589  *  Device removal routine
590  *
591  *  The detach entry point is called when the driver is being removed.
592  *  This routine stops the adapter and deallocates all the resources
593  *  that were allocated for driver operation.
594  *
595  *  return 0 on success, positive on failure
596  *********************************************************************/
597
598 static int
599 ixgbe_detach(device_t dev)
600 {
601         struct adapter *adapter = device_get_softc(dev);
602         u32     ctrl_ext;
603
604         INIT_DEBUGOUT("ixgbe_detach: begin");
605
606         /* Make sure VLANS are not using driver */
607         if (adapter->ifp->if_vlantrunks != NULL) {
608                 device_printf(dev,"Vlan in use, detach first\n");
609                 return (EBUSY);
610         }
611
612         IXGBE_CORE_LOCK(adapter);
613         ixgbe_stop(adapter);
614         IXGBE_CORE_UNLOCK(adapter);
615
616         /* Drain the Link queue */
617         if (adapter->tq) {
618                 taskqueue_drain(adapter->tq, &adapter->mod_task);
619                 taskqueue_drain(adapter->tq, &adapter->msf_task);
620 #ifdef IXGBE_FDIR
621                 taskqueue_drain(adapter->tq, &adapter->fdir_task);
622 #endif
623                 taskqueue_free(adapter->tq);
624         }
625
626         /* let hardware know driver is unloading */
627         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
628         ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
629         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
630
631         /* Unregister VLAN events */
632         if (adapter->vlan_attach != NULL)
633                 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
634         if (adapter->vlan_detach != NULL)
635                 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
636
637         ether_ifdetach(adapter->ifp);
638         callout_stop(&adapter->timer);
639 #ifdef DEV_NETMAP
640         netmap_detach(adapter->ifp);
641 #endif /* DEV_NETMAP */
642         ixgbe_free_pci_resources(adapter);
643         bus_generic_detach(dev);
644         if_free(adapter->ifp);
645
646         ixgbe_free_transmit_structures(adapter);
647         ixgbe_free_receive_structures(adapter);
648         kfree(adapter->mta, M_DEVBUF);
649         sysctl_ctx_free(&adapter->sysctl_ctx);
650         
651         IXGBE_CORE_LOCK_DESTROY(adapter);
652         return (0);
653 }
654
655 /*********************************************************************
656  *
657  *  Shutdown entry point
658  *
659  **********************************************************************/
660
661 static int
662 ixgbe_shutdown(device_t dev)
663 {
664         struct adapter *adapter = device_get_softc(dev);
665         IXGBE_CORE_LOCK(adapter);
666         ixgbe_stop(adapter);
667         IXGBE_CORE_UNLOCK(adapter);
668         return (0);
669 }
670
671
672 /*********************************************************************
673  *  Transmit entry point
674  *
675  *  ixgbe_start is called by the stack to initiate a transmit.
676  *  The driver will remain in this routine as long as there are
677  *  packets to transmit and transmit resources are available.
678  *  In case resources are not available stack is notified and
679  *  the packet is requeued.
680  **********************************************************************/
681
682 static void
683 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
684 {
685         struct mbuf    *m_head;
686         struct adapter *adapter = txr->adapter;
687
688         IXGBE_TX_LOCK_ASSERT(txr);
689
690         if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
691                 return;
692
693         if (!adapter->link_active) {
694                 ifq_purge(&ifp->if_snd);
695                 return;
696         }
697
698         while (!ifq_is_empty(&ifp->if_snd)) {
699                 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE) {
700                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
701                         break;
702                 }
703
704                 m_head = ifq_dequeue(&ifp->if_snd, NULL);
705                 if (m_head == NULL)
706                         break;
707
708                 if (ixgbe_xmit(txr, &m_head)) {
709 #if 0 /* XXX: prepend to an ALTQ queue ? */
710                         if (m_head != NULL)
711                                 IF_PREPEND(&ifp->if_snd, m_head);
712 #endif
713                         if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
714                                 txr->queue_status |= IXGBE_QUEUE_DEPLETED;
715                         break;
716                 }
717                 /* Send a copy of the frame to the BPF listener */
718                 ETHER_BPF_MTAP(ifp, m_head);
719
720                 /* Set watchdog on */
721                 txr->watchdog_time = ticks;
722                 txr->queue_status = IXGBE_QUEUE_WORKING;
723
724         }
725         return;
726 }
727
728 /*
729  * Legacy TX start - called by the stack, this
730  * always uses the first tx ring, and should
731  * not be used with multiqueue tx enabled.
732  */
733 static void
734 ixgbe_start(struct ifnet *ifp)
735 {
736         struct adapter *adapter = ifp->if_softc;
737         struct tx_ring  *txr = adapter->tx_rings;
738
739         if (ifp->if_flags & IFF_RUNNING) {
740                 IXGBE_TX_LOCK(txr);
741                 ixgbe_start_locked(txr, ifp);
742                 IXGBE_TX_UNLOCK(txr);
743         }
744         return;
745 }
746
747 #if 0 /* __FreeBSD_version >= 800000 */
748 /*
749 ** Multiqueue Transmit driver
750 **
751 */
752 static int
753 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
754 {
755         struct adapter  *adapter = ifp->if_softc;
756         struct ix_queue *que;
757         struct tx_ring  *txr;
758         int             i = 0, err = 0;
759
760         /* Which queue to use */
761         if ((m->m_flags & M_FLOWID) != 0)
762                 i = m->m_pkthdr.flowid % adapter->num_queues;
763         else
764                 i = curcpu % adapter->num_queues;
765
766         txr = &adapter->tx_rings[i];
767         que = &adapter->queues[i];
768
769         if (((txr->queue_status & IXGBE_QUEUE_DEPLETED) == 0) &&
770             IXGBE_TX_TRYLOCK(txr)) {
771                 err = ixgbe_mq_start_locked(ifp, txr, m);
772                 IXGBE_TX_UNLOCK(txr);
773         } else {
774                 err = drbr_enqueue(ifp, txr->br, m);
775                 taskqueue_enqueue(que->tq, &que->que_task);
776         }
777
778         return (err);
779 }
780
781 static int
782 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
783 {
784         struct adapter  *adapter = txr->adapter;
785         struct mbuf     *next;
786         int             enqueued, err = 0;
787
788         if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
789             (txr->queue_status == IXGBE_QUEUE_DEPLETED) ||
790             adapter->link_active == 0) {
791                 if (m != NULL)
792                         err = drbr_enqueue(ifp, txr->br, m);
793                 return (err);
794         }
795
796         enqueued = 0;
797         if (m == NULL) {
798                 next = drbr_dequeue(ifp, txr->br);
799         } else if (drbr_needs_enqueue(ifp, txr->br)) {
800                 if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
801                         return (err);
802                 next = drbr_dequeue(ifp, txr->br);
803         } else
804                 next = m;
805
806         /* Process the queue */
807         while (next != NULL) {
808                 if ((err = ixgbe_xmit(txr, &next)) != 0) {
809                         if (next != NULL)
810                                 err = drbr_enqueue(ifp, txr->br, next);
811                         break;
812                 }
813                 enqueued++;
814                 drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
815                 /* Send a copy of the frame to the BPF listener */
816                 ETHER_BPF_MTAP(ifp, next);
817                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
818                         break;
819                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
820                         ixgbe_txeof(txr);
821                 if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD) {
822                         txr->queue_status |= IXGBE_QUEUE_DEPLETED;
823                         break;
824                 }
825                 next = drbr_dequeue(ifp, txr->br);
826         }
827
828         if (enqueued > 0) {
829                 /* Set watchdog on */
830                 txr->queue_status |= IXGBE_QUEUE_WORKING;
831                 txr->watchdog_time = ticks;
832         }
833
834         if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
835                 ixgbe_txeof(txr);
836
837         return (err);
838 }
839
840 /*
841 ** Flush all ring buffers
842 */
843 static void
844 ixgbe_qflush(struct ifnet *ifp)
845 {
846         struct adapter  *adapter = ifp->if_softc;
847         struct tx_ring  *txr = adapter->tx_rings;
848         struct mbuf     *m;
849
850         for (int i = 0; i < adapter->num_queues; i++, txr++) {
851                 IXGBE_TX_LOCK(txr);
852                 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
853                         m_freem(m);
854                 IXGBE_TX_UNLOCK(txr);
855         }
856         if_qflush(ifp);
857 }
858 #endif /* __FreeBSD_version >= 800000 */
859
860 /*********************************************************************
861  *  Ioctl entry point
862  *
863  *  ixgbe_ioctl is called when the user wants to configure the
864  *  interface.
865  *
866  *  return 0 on success, positive on failure
867  **********************************************************************/
868
869 static int
870 ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data, struct ucred *cr)
871 {
872         struct adapter  *adapter = ifp->if_softc;
873         struct ifreq    *ifr = (struct ifreq *) data;
874 #if defined(INET) || defined(INET6)
875         struct ifaddr *ifa = (struct ifaddr *)data;
876         bool            avoid_reset = FALSE;
877 #endif
878         int             error = 0;
879
880         switch (command) {
881
882         case SIOCSIFADDR:
883 #ifdef INET
884                 if (ifa->ifa_addr->sa_family == AF_INET)
885                         avoid_reset = TRUE;
886 #endif
887 #ifdef INET6
888                 if (ifa->ifa_addr->sa_family == AF_INET6)
889                         avoid_reset = TRUE;
890 #endif
891 #if defined(INET) || defined(INET6)
892                 /*
893                 ** Calling init results in link renegotiation,
894                 ** so we avoid doing it when possible.
895                 */
896                 if (avoid_reset) {
897                         ifp->if_flags |= IFF_UP;
898                         if (!(ifp->if_flags & IFF_RUNNING))
899                                 ixgbe_init(adapter);
900                         if (!(ifp->if_flags & IFF_NOARP))
901                                 arp_ifinit(ifp, ifa);
902                 } else
903                         error = ether_ioctl(ifp, command, data);
904 #endif
905                 break;
906         case SIOCSIFMTU:
907                 IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
908                 if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
909                         error = EINVAL;
910                 } else {
911                         IXGBE_CORE_LOCK(adapter);
912                         ifp->if_mtu = ifr->ifr_mtu;
913                         adapter->max_frame_size =
914                                 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
915                         ixgbe_init_locked(adapter);
916                         IXGBE_CORE_UNLOCK(adapter);
917                 }
918                 break;
919         case SIOCSIFFLAGS:
920                 IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
921                 IXGBE_CORE_LOCK(adapter);
922                 if (ifp->if_flags & IFF_UP) {
923                         if ((ifp->if_flags & IFF_RUNNING)) {
924                                 if ((ifp->if_flags ^ adapter->if_flags) &
925                                     (IFF_PROMISC | IFF_ALLMULTI)) {
926                                         ixgbe_set_promisc(adapter);
927                                 }
928                         } else
929                                 ixgbe_init_locked(adapter);
930                 } else
931                         if (ifp->if_flags & IFF_RUNNING)
932                                 ixgbe_stop(adapter);
933                 adapter->if_flags = ifp->if_flags;
934                 IXGBE_CORE_UNLOCK(adapter);
935                 break;
936         case SIOCADDMULTI:
937         case SIOCDELMULTI:
938                 IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
939                 if (ifp->if_flags & IFF_RUNNING) {
940                         IXGBE_CORE_LOCK(adapter);
941                         ixgbe_disable_intr(adapter);
942                         ixgbe_set_multi(adapter);
943                         ixgbe_enable_intr(adapter);
944                         IXGBE_CORE_UNLOCK(adapter);
945                 }
946                 break;
947         case SIOCSIFMEDIA:
948         case SIOCGIFMEDIA:
949                 IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
950                 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
951                 break;
952         case SIOCSIFCAP:
953         {
954                 int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
955                 IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
956                 if (mask & IFCAP_HWCSUM)
957                         ifp->if_capenable ^= IFCAP_HWCSUM;
958                 if (mask & IFCAP_TSO4)
959                         ifp->if_capenable ^= IFCAP_TSO4;
960                 if (mask & IFCAP_TSO6)
961                         ifp->if_capenable ^= IFCAP_TSO6;
962 #if 0 /* NET_LRO */
963                 if (mask & IFCAP_LRO)
964                         ifp->if_capenable ^= IFCAP_LRO;
965 #endif
966                 if (mask & IFCAP_VLAN_HWTAGGING)
967                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
968                 if (mask & IFCAP_VLAN_HWFILTER)
969                         ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
970 #if 0 /* NET_TSO */
971                 if (mask & IFCAP_VLAN_HWTSO)
972                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
973 #endif
974                 if (ifp->if_flags & IFF_RUNNING) {
975                         IXGBE_CORE_LOCK(adapter);
976                         ixgbe_init_locked(adapter);
977                         IXGBE_CORE_UNLOCK(adapter);
978                 }
979 #if 0
980                 VLAN_CAPABILITIES(ifp);
981 #endif
982                 break;
983         }
984
985         default:
986                 IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
987                 error = ether_ioctl(ifp, command, data);
988                 break;
989         }
990
991         return (error);
992 }
993
994 /*********************************************************************
995  *  Init entry point
996  *
997  *  This routine is used in two ways. It is used by the stack as
998  *  init entry point in network interface structure. It is also used
999  *  by the driver as a hw/sw initialization routine to get to a
1000  *  consistent state.
1001  *
1002  *  return 0 on success, positive on failure
1003  **********************************************************************/
1004 #define IXGBE_MHADD_MFS_SHIFT 16
1005
1006 static void
1007 ixgbe_init_locked(struct adapter *adapter)
1008 {
1009         struct ifnet   *ifp = adapter->ifp;
1010         device_t        dev = adapter->dev;
1011         struct ixgbe_hw *hw = &adapter->hw;
1012         u32             k, txdctl, mhadd, gpie;
1013         u32             rxdctl, rxctrl;
1014
1015         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1016         INIT_DEBUGOUT("ixgbe_init: begin");
1017         hw->adapter_stopped = FALSE;
1018         ixgbe_stop_adapter(hw);
1019         callout_stop(&adapter->timer);
1020
1021         /* reprogram the RAR[0] in case user changed it. */
1022         ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1023
1024         /* Get the latest mac address, User can use a LAA */
1025         bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1026               IXGBE_ETH_LENGTH_OF_ADDRESS);
1027         ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1028         hw->addr_ctrl.rar_used_count = 1;
1029
1030         /* Set the various hardware offload abilities */
1031         ifp->if_hwassist = 0;
1032         if (ifp->if_capenable & IFCAP_TSO)
1033                 ifp->if_hwassist |= CSUM_TSO;
1034         if (ifp->if_capenable & IFCAP_TXCSUM) {
1035                 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1036 #if 0
1037                 if (hw->mac.type != ixgbe_mac_82598EB)
1038                         ifp->if_hwassist |= CSUM_SCTP;
1039 #endif
1040         }
1041
1042         /* Prepare transmit descriptors and buffers */
1043         if (ixgbe_setup_transmit_structures(adapter)) {
1044                 device_printf(dev,"Could not setup transmit structures\n");
1045                 ixgbe_stop(adapter);
1046                 return;
1047         }
1048
1049         ixgbe_init_hw(hw);
1050         ixgbe_initialize_transmit_units(adapter);
1051
1052         /* Setup Multicast table */
1053         ixgbe_set_multi(adapter);
1054
1055         /*
1056         ** Determine the correct mbuf pool
1057         ** for doing jumbo/headersplit
1058         */
1059 #if 0 /* XXX */
1060         if (adapter->max_frame_size <= 2048)
1061                 adapter->rx_mbuf_sz = MCLBYTES;
1062         else if (adapter->max_frame_size <= 4096)
1063                 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1064         else if (adapter->max_frame_size <= 9216)
1065                 adapter->rx_mbuf_sz = MJUM9BYTES;
1066         else
1067                 adapter->rx_mbuf_sz = MJUM16BYTES;
1068 #else
1069         adapter->rx_mbuf_sz = MCLBYTES;
1070 #endif
1071
1072         /* Prepare receive descriptors and buffers */
1073         if (ixgbe_setup_receive_structures(adapter)) {
1074                 device_printf(dev,"Could not setup receive structures\n");
1075                 ixgbe_stop(adapter);
1076                 return;
1077         }
1078
1079         /* Configure RX settings */
1080         ixgbe_initialize_receive_units(adapter);
1081
1082         gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1083
1084         /* Enable Fan Failure Interrupt */
1085         gpie |= IXGBE_SDP1_GPIEN;
1086
1087         /* Add for Module detection */
1088         if (hw->mac.type == ixgbe_mac_82599EB)
1089                 gpie |= IXGBE_SDP2_GPIEN;
1090
1091         /* Thermal Failure Detection */
1092         if (hw->mac.type == ixgbe_mac_X540)
1093                 gpie |= IXGBE_SDP0_GPIEN;
1094
1095         if (adapter->msix > 1) {
1096                 /* Enable Enhanced MSIX mode */
1097                 gpie |= IXGBE_GPIE_MSIX_MODE;
1098                 gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1099                     IXGBE_GPIE_OCD;
1100         }
1101         IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1102
1103         /* Set MTU size */
1104         if (ifp->if_mtu > ETHERMTU) {
1105                 mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1106                 mhadd &= ~IXGBE_MHADD_MFS_MASK;
1107                 mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1108                 IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1109         }
1110         
1111         /* Now enable all the queues */
1112
1113         for (int i = 0; i < adapter->num_queues; i++) {
1114                 txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1115                 txdctl |= IXGBE_TXDCTL_ENABLE;
1116                 /* Set WTHRESH to 8, burst writeback */
1117                 txdctl |= (8 << 16);
1118                 /*
1119                  * When the internal queue falls below PTHRESH (32),
1120                  * start prefetching as long as there are at least
1121                  * HTHRESH (1) buffers ready. The values are taken
1122                  * from the Intel linux driver 3.8.21.
1123                  * Prefetching enables tx line rate even with 1 queue.
1124                  */
1125                 txdctl |= (32 << 0) | (1 << 8);
1126                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1127         }
1128
1129         for (int i = 0; i < adapter->num_queues; i++) {
1130                 rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1131                 if (hw->mac.type == ixgbe_mac_82598EB) {
1132                         /*
1133                         ** PTHRESH = 21
1134                         ** HTHRESH = 4
1135                         ** WTHRESH = 8
1136                         */
1137                         rxdctl &= ~0x3FFFFF;
1138                         rxdctl |= 0x080420;
1139                 }
1140                 rxdctl |= IXGBE_RXDCTL_ENABLE;
1141                 IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1142                 for (k = 0; k < 10; k++) {
1143                         if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1144                             IXGBE_RXDCTL_ENABLE)
1145                                 break;
1146                         else
1147                                 msec_delay(1);
1148                 }
1149                 wmb();
1150 #ifdef DEV_NETMAP
1151                 /*
1152                  * In netmap mode, we must preserve the buffers made
1153                  * available to userspace before the if_init()
1154                  * (this is true by default on the TX side, because
1155                  * init makes all buffers available to userspace).
1156                  *
1157                  * netmap_reset() and the device specific routines
1158                  * (e.g. ixgbe_setup_receive_rings()) map these
1159                  * buffers at the end of the NIC ring, so here we
1160                  * must set the RDT (tail) register to make sure
1161                  * they are not overwritten.
1162                  *
1163                  * In this driver the NIC ring starts at RDH = 0,
1164                  * RDT points to the last slot available for reception (?),
1165                  * so RDT = num_rx_desc - 1 means the whole ring is available.
1166                  */
1167                 if (ifp->if_capenable & IFCAP_NETMAP) {
1168                         struct netmap_adapter *na = NA(adapter->ifp);
1169                         struct netmap_kring *kring = &na->rx_rings[i];
1170                         int t = na->num_rx_desc - 1 - kring->nr_hwavail;
1171
1172                         IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1173                 } else
1174 #endif /* DEV_NETMAP */
1175                 IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1176         }
1177
1178         /* Set up VLAN support and filter */
1179         ixgbe_setup_vlan_hw_support(adapter);
1180
1181         /* Enable Receive engine */
1182         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1183         if (hw->mac.type == ixgbe_mac_82598EB)
1184                 rxctrl |= IXGBE_RXCTRL_DMBYPS;
1185         rxctrl |= IXGBE_RXCTRL_RXEN;
1186         ixgbe_enable_rx_dma(hw, rxctrl);
1187
1188         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1189
1190         /* Set up MSI/X routing */
1191         if (ixgbe_enable_msix)  {
1192                 ixgbe_configure_ivars(adapter);
1193                 /* Set up auto-mask */
1194                 if (hw->mac.type == ixgbe_mac_82598EB)
1195                         IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1196                 else {
1197                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1198                         IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1199                 }
1200         } else {  /* Simple settings for Legacy/MSI */
1201                 ixgbe_set_ivar(adapter, 0, 0, 0);
1202                 ixgbe_set_ivar(adapter, 0, 0, 1);
1203                 IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1204         }
1205
1206 #ifdef IXGBE_FDIR
1207         /* Init Flow director */
1208         if (hw->mac.type != ixgbe_mac_82598EB) {
1209                 u32 hdrm = 32 << fdir_pballoc;
1210
1211                 hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1212                 ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1213         }
1214 #endif
1215
1216         /*
1217         ** Check on any SFP devices that
1218         ** need to be kick-started
1219         */
1220         if (hw->phy.type == ixgbe_phy_none) {
1221                 int err = hw->phy.ops.identify(hw);
1222                 if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1223                         device_printf(dev,
1224                             "Unsupported SFP+ module type was detected.\n");
1225                         return;
1226                 }
1227         }
1228
1229         /* Set moderation on the Link interrupt */
1230         ixgbe_set_eitr(adapter, adapter->linkvec, IXGBE_LINK_ITR);
1231
1232         /* Config/Enable Link */
1233         ixgbe_config_link(adapter);
1234
1235         /* Hardware Packet Buffer & Flow Control setup */
1236         {
1237                 u32 rxpb, frame, size, tmp;
1238
1239                 frame = adapter->max_frame_size;
1240
1241                 /* Calculate High Water */
1242                 if (hw->mac.type == ixgbe_mac_X540)
1243                         tmp = IXGBE_DV_X540(frame, frame);
1244                 else
1245                         tmp = IXGBE_DV(frame, frame);
1246                 size = IXGBE_BT2KB(tmp);
1247                 rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1248                 hw->fc.high_water[0] = rxpb - size;
1249
1250                 /* Now calculate Low Water */
1251                 if (hw->mac.type == ixgbe_mac_X540)
1252                         tmp = IXGBE_LOW_DV_X540(frame);
1253                 else
1254                         tmp = IXGBE_LOW_DV(frame);
1255                 hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1256                 
1257                 adapter->fc = hw->fc.requested_mode = ixgbe_fc_full;
1258                 hw->fc.pause_time = IXGBE_FC_PAUSE;
1259                 hw->fc.send_xon = TRUE;
1260         }
1261         /* Initialize the FC settings */
1262         ixgbe_start_hw(hw);
1263
1264         /* And now turn on interrupts */
1265         ixgbe_enable_intr(adapter);
1266
1267         /* Now inform the stack we're ready */
1268         ifp->if_flags |= IFF_RUNNING;
1269         ifp->if_flags &= ~IFF_OACTIVE;
1270
1271         return;
1272 }
1273
1274 static void
1275 ixgbe_init(void *arg)
1276 {
1277         struct adapter *adapter = arg;
1278
1279         IXGBE_CORE_LOCK(adapter);
1280         ixgbe_init_locked(adapter);
1281         IXGBE_CORE_UNLOCK(adapter);
1282         return;
1283 }
1284
1285
1286 /*
1287 **
1288 ** MSIX Interrupt Handlers and Tasklets
1289 **
1290 */
1291
1292 static inline void
1293 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1294 {
1295         struct ixgbe_hw *hw = &adapter->hw;
1296         u64     queue = (u64)(1 << vector);
1297         u32     mask;
1298
1299         if (hw->mac.type == ixgbe_mac_82598EB) {
1300                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1301                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1302         } else {
1303                 mask = (queue & 0xFFFFFFFF);
1304                 if (mask)
1305                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1306                 mask = (queue >> 32);
1307                 if (mask)
1308                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1309         }
1310 }
1311
1312 static inline void
1313 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1314 {
1315         struct ixgbe_hw *hw = &adapter->hw;
1316         u64     queue = (u64)(1 << vector);
1317         u32     mask;
1318
1319         if (hw->mac.type == ixgbe_mac_82598EB) {
1320                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1321                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1322         } else {
1323                 mask = (queue & 0xFFFFFFFF);
1324                 if (mask)
1325                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1326                 mask = (queue >> 32);
1327                 if (mask)
1328                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1329         }
1330 }
1331
1332 static inline void
1333 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
1334 {
1335         u32 mask;
1336
1337         if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
1338                 mask = (IXGBE_EIMS_RTX_QUEUE & queues);
1339                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
1340         } else {
1341                 mask = (queues & 0xFFFFFFFF);
1342                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
1343                 mask = (queues >> 32);
1344                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
1345         }
1346 }
1347
1348 /*********************************************************************
1349  *
1350  *  Legacy Interrupt Service routine
1351  *
1352  **********************************************************************/
1353
1354 static void
1355 ixgbe_legacy_irq(void *arg)
1356 {
1357         struct ix_queue *que = arg;
1358         struct adapter  *adapter = que->adapter;
1359         struct ixgbe_hw *hw = &adapter->hw;
1360         struct          tx_ring *txr = adapter->tx_rings;
1361         u32             reg_eicr;
1362
1363
1364         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1365
1366         ++que->irqs;
1367         if (reg_eicr == 0) {
1368                 ixgbe_enable_intr(adapter);
1369                 return;
1370         }
1371
1372         ixgbe_rxeof(que, adapter->rx_process_limit);
1373
1374         IXGBE_TX_LOCK(txr);
1375         ixgbe_txeof(txr);
1376         if (!ifq_is_empty(&adapter->ifp->if_snd))
1377                 ixgbe_start_locked(txr, adapter->ifp);
1378         IXGBE_TX_UNLOCK(txr);
1379
1380         /* Check for fan failure */
1381         if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1382             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1383                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1384                     "REPLACE IMMEDIATELY!!\n");
1385                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1386         }
1387
1388         /* Link status change */
1389         if (reg_eicr & IXGBE_EICR_LSC) {
1390                 ixgbe_check_link(&adapter->hw,
1391                     &adapter->link_speed, &adapter->link_up, 0);
1392                 ixgbe_update_link_status(adapter);
1393         }
1394
1395         ixgbe_enable_intr(adapter);
1396 }
1397
1398
1399 /*********************************************************************
1400  *
1401  *  MSIX Queue Interrupt Service routine
1402  *
1403  **********************************************************************/
1404 void
1405 ixgbe_msix_que(void *arg)
1406 {
1407         struct ix_queue *que = arg;
1408         struct adapter  *adapter = que->adapter;
1409         struct tx_ring  *txr = que->txr;
1410
1411         ixgbe_disable_queue(adapter, que->msix);
1412         ++que->irqs;
1413
1414         ixgbe_rxeof(que, adapter->rx_process_limit);
1415
1416         IXGBE_TX_LOCK(txr);
1417         ixgbe_txeof(txr);
1418         if (!ifq_is_empty(&adapter->ifp->if_snd))
1419                 ixgbe_start_locked(txr, adapter->ifp);
1420         IXGBE_TX_UNLOCK(txr);
1421
1422         /* Reenable this interrupt */
1423         ixgbe_enable_queue(adapter, que->msix);
1424 }
1425
1426
1427 static void
1428 ixgbe_msix_link(void *arg)
1429 {
1430         struct adapter  *adapter = arg;
1431         struct ixgbe_hw *hw = &adapter->hw;
1432         u32             reg_eicr;
1433
1434         ++adapter->link_irq;
1435
1436         /* First get the cause */
1437         reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1438         /* Clear interrupt with write */
1439         IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1440
1441         /* Link status change */
1442         if (reg_eicr & IXGBE_EICR_LSC) {
1443                 ixgbe_check_link(&adapter->hw,
1444                     &adapter->link_speed, &adapter->link_up, 0);
1445                 ixgbe_update_link_status(adapter);
1446         }
1447
1448         if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1449 #ifdef IXGBE_FDIR
1450                 if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1451                         /* This is probably overkill :) */
1452                         if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1453                                 return;
1454                         /* Disable the interrupt */
1455                         IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1456                         taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1457                 } else
1458 #endif
1459                 if (reg_eicr & IXGBE_EICR_ECC) {
1460                         device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1461                             "Please Reboot!!\n");
1462                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1463                 } else
1464
1465                 if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1466                         /* Clear the interrupt */
1467                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1468                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1469                 } else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1470                         /* Clear the interrupt */
1471                         IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1472                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1473                 }
1474         } 
1475
1476         /* Check for fan failure */
1477         if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1478             (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1479                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1480                     "REPLACE IMMEDIATELY!!\n");
1481                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1482         }
1483
1484         /* Check for over temp condition */
1485         if ((hw->mac.type == ixgbe_mac_X540) &&
1486             (reg_eicr & IXGBE_EICR_GPI_SDP0)) {
1487                 device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1488                     "PHY IS SHUT DOWN!!\n");
1489                 device_printf(adapter->dev, "System shutdown required\n");
1490                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0);
1491         }
1492
1493         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1494         return;
1495 }
1496
1497 /*********************************************************************
1498  *
1499  *  Media Ioctl callback
1500  *
1501  *  This routine is called whenever the user queries the status of
1502  *  the interface using ifconfig.
1503  *
1504  **********************************************************************/
1505 static void
1506 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1507 {
1508         struct adapter *adapter = ifp->if_softc;
1509
1510         ASSERT_IFNET_SERIALIZED_ALL(ifp);
1511
1512         INIT_DEBUGOUT("ixgbe_media_status: begin");
1513         ixgbe_update_link_status(adapter);
1514
1515         ifmr->ifm_status = IFM_AVALID;
1516         ifmr->ifm_active = IFM_ETHER;
1517
1518         if (!adapter->link_active)
1519                 return;
1520
1521         ifmr->ifm_status |= IFM_ACTIVE;
1522
1523         switch (adapter->link_speed) {
1524                 case IXGBE_LINK_SPEED_100_FULL:
1525                         ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1526                         break;
1527                 case IXGBE_LINK_SPEED_1GB_FULL:
1528                         ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
1529                         break;
1530                 case IXGBE_LINK_SPEED_10GB_FULL:
1531                         ifmr->ifm_active |= adapter->optics | IFM_FDX;
1532                         break;
1533         }
1534
1535         return;
1536 }
1537
1538 /*********************************************************************
1539  *
1540  *  Media Ioctl callback
1541  *
1542  *  This routine is called when the user changes speed/duplex using
1543  *  media/mediopt option with ifconfig.
1544  *
1545  **********************************************************************/
1546 static int
1547 ixgbe_media_change(struct ifnet * ifp)
1548 {
1549         struct adapter *adapter = ifp->if_softc;
1550         struct ifmedia *ifm = &adapter->media;
1551
1552         INIT_DEBUGOUT("ixgbe_media_change: begin");
1553
1554         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1555                 return (EINVAL);
1556
1557         switch (IFM_SUBTYPE(ifm->ifm_media)) {
1558         case IFM_AUTO:
1559                 adapter->hw.phy.autoneg_advertised =
1560                     IXGBE_LINK_SPEED_100_FULL |
1561                     IXGBE_LINK_SPEED_1GB_FULL |
1562                     IXGBE_LINK_SPEED_10GB_FULL;
1563                 break;
1564         default:
1565                 device_printf(adapter->dev, "Only auto media type\n");
1566                 return (EINVAL);
1567         }
1568
1569         return (0);
1570 }
1571
1572 /*********************************************************************
1573  *
1574  *  This routine maps the mbufs to tx descriptors, allowing the
1575  *  TX engine to transmit the packets. 
1576  *      - return 0 on success, positive on failure
1577  *
1578  **********************************************************************/
1579
1580 static int
1581 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1582 {
1583         struct adapter  *adapter = txr->adapter;
1584         u32             olinfo_status = 0, cmd_type_len;
1585         u32             paylen = 0;
1586         int             i, j, error, nsegs, maxsegs;
1587         int             first, last = 0;
1588         struct mbuf     *m_head;
1589         bus_dma_segment_t segs[adapter->num_segs];
1590         bus_dmamap_t    map;
1591         struct ixgbe_tx_buf *txbuf;
1592         union ixgbe_adv_tx_desc *txd = NULL;
1593
1594         m_head = *m_headp;
1595
1596         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1597                 error = ixgbe_tso_pullup(txr, m_headp);
1598                 if (error)
1599                         return error;
1600                 m_head = *m_headp;
1601         }
1602
1603         /* Basic descriptor defines */
1604         cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1605             IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1606
1607         if (m_head->m_flags & M_VLANTAG)
1608                 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1609
1610         /*
1611          * Important to capture the first descriptor
1612          * used because it will contain the index of
1613          * the one we tell the hardware to report back
1614          */
1615         first = txr->next_avail_desc;
1616         txbuf = &txr->tx_buffers[first];
1617         map = txbuf->map;
1618
1619         /*
1620          * Map the packet for DMA.
1621          */
1622         maxsegs = txr->tx_avail - IXGBE_TX_RESERVED;
1623         if (maxsegs > adapter->num_segs)
1624                 maxsegs = adapter->num_segs;
1625
1626         error = bus_dmamap_load_mbuf_defrag(txr->txtag, map, m_headp,
1627             segs, maxsegs, &nsegs, BUS_DMA_NOWAIT);
1628         if (error) {
1629                 if (error == ENOBUFS)
1630                         adapter->mbuf_defrag_failed++;
1631                 else
1632                         adapter->no_tx_dma_setup++;
1633
1634                 m_freem(*m_headp);
1635                 *m_headp = NULL;
1636                 return (error);
1637         }
1638
1639         /* Make certain there are enough descriptors */
1640         if (nsegs > txr->tx_avail - 2) {
1641                 txr->no_desc_avail++;
1642                 error = ENOBUFS;
1643                 goto xmit_fail;
1644         }
1645         m_head = *m_headp;
1646
1647         /*
1648         ** Set up the appropriate offload context
1649         ** this becomes the first descriptor of 
1650         ** a packet.
1651         */
1652         if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1653                 if (ixgbe_tso_setup(txr, m_head, &paylen, &olinfo_status)) {
1654                         cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
1655                         olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1656                         olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
1657                         ++adapter->tso_tx;
1658                 } else
1659                         return (ENXIO);
1660         } else if (ixgbe_tx_ctx_setup(txr, m_head))
1661                 olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
1662
1663 #ifdef IXGBE_IEEE1588
1664         /* This is changing soon to an mtag detection */
1665         if (we detect this mbuf has a TSTAMP mtag)
1666                 cmd_type_len |= IXGBE_ADVTXD_MAC_TSTAMP;
1667 #endif
1668
1669 #ifdef IXGBE_FDIR
1670         /* Do the flow director magic */
1671         if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1672                 ++txr->atr_count;
1673                 if (txr->atr_count >= atr_sample_rate) {
1674                         ixgbe_atr(txr, m_head);
1675                         txr->atr_count = 0;
1676                 }
1677         }
1678 #endif
1679         /* Record payload length */
1680         if (paylen == 0)
1681                 olinfo_status |= m_head->m_pkthdr.len <<
1682                     IXGBE_ADVTXD_PAYLEN_SHIFT;
1683
1684         i = txr->next_avail_desc;
1685         for (j = 0; j < nsegs; j++) {
1686                 bus_size_t seglen;
1687                 bus_addr_t segaddr;
1688
1689                 txbuf = &txr->tx_buffers[i];
1690                 txd = &txr->tx_base[i];
1691                 seglen = segs[j].ds_len;
1692                 segaddr = htole64(segs[j].ds_addr);
1693
1694                 txd->read.buffer_addr = segaddr;
1695                 txd->read.cmd_type_len = htole32(txr->txd_cmd |
1696                     cmd_type_len |seglen);
1697                 txd->read.olinfo_status = htole32(olinfo_status);
1698                 last = i; /* descriptor that will get completion IRQ */
1699
1700                 if (++i == adapter->num_tx_desc)
1701                         i = 0;
1702
1703                 txbuf->m_head = NULL;
1704                 txbuf->eop_index = -1;
1705         }
1706
1707         txd->read.cmd_type_len |=
1708             htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1709         txr->tx_avail -= nsegs;
1710         txr->next_avail_desc = i;
1711
1712         txbuf->m_head = m_head;
1713         /* Swap the dma map between the first and last descriptor */
1714         txr->tx_buffers[first].map = txbuf->map;
1715         txbuf->map = map;
1716         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1717
1718         /* Set the index of the descriptor that will be marked done */
1719         txbuf = &txr->tx_buffers[first];
1720         txbuf->eop_index = last;
1721
1722         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1723             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1724         /*
1725          * Advance the Transmit Descriptor Tail (Tdt), this tells the
1726          * hardware that this frame is available to transmit.
1727          */
1728         ++txr->total_packets;
1729         IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1730
1731         return (0);
1732
1733 xmit_fail:
1734         bus_dmamap_unload(txr->txtag, txbuf->map);
1735         return (error);
1736
1737 }
1738
1739 static void
1740 ixgbe_set_promisc(struct adapter *adapter)
1741 {
1742         u_int32_t       reg_rctl;
1743         struct ifnet   *ifp = adapter->ifp;
1744
1745         reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1746         reg_rctl &= (~IXGBE_FCTRL_UPE);
1747         reg_rctl &= (~IXGBE_FCTRL_MPE);
1748         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1749
1750         if (ifp->if_flags & IFF_PROMISC) {
1751                 reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1752                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1753         } else if (ifp->if_flags & IFF_ALLMULTI) {
1754                 reg_rctl |= IXGBE_FCTRL_MPE;
1755                 reg_rctl &= ~IXGBE_FCTRL_UPE;
1756                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1757         }
1758         return;
1759 }
1760
1761
1762 /*********************************************************************
1763  *  Multicast Update
1764  *
1765  *  This routine is called whenever multicast address list is updated.
1766  *
1767  **********************************************************************/
1768 #define IXGBE_RAR_ENTRIES 16
1769
1770 static void
1771 ixgbe_set_multi(struct adapter *adapter)
1772 {
1773         u32     fctrl;
1774         u8      *mta;
1775         u8      *update_ptr;
1776         struct  ifmultiaddr *ifma;
1777         int     mcnt = 0;
1778         struct ifnet   *ifp = adapter->ifp;
1779
1780         IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1781
1782         mta = adapter->mta;
1783         bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1784             MAX_NUM_MULTICAST_ADDRESSES);
1785
1786         fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1787         fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1788         if (ifp->if_flags & IFF_PROMISC)
1789                 fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1790         else if (ifp->if_flags & IFF_ALLMULTI) {
1791                 fctrl |= IXGBE_FCTRL_MPE;
1792                 fctrl &= ~IXGBE_FCTRL_UPE;
1793         } else
1794                 fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1795         
1796         IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
1797
1798         TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1799                 if (ifma->ifma_addr->sa_family != AF_LINK)
1800                         continue;
1801                 bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1802                     &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1803                     IXGBE_ETH_LENGTH_OF_ADDRESS);
1804                 mcnt++;
1805         }
1806
1807         update_ptr = mta;
1808         ixgbe_update_mc_addr_list(&adapter->hw,
1809             update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
1810
1811         return;
1812 }
1813
1814 /*
1815  * This is an iterator function now needed by the multicast
1816  * shared code. It simply feeds the shared code routine the
1817  * addresses in the array of ixgbe_set_multi() one by one.
1818  */
1819 static u8 *
1820 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
1821 {
1822         u8 *addr = *update_ptr;
1823         u8 *newptr;
1824         *vmdq = 0;
1825
1826         newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
1827         *update_ptr = newptr;
1828         return addr;
1829 }
1830
1831
1832 /*********************************************************************
1833  *  Timer routine
1834  *
1835  *  This routine checks for link status,updates statistics,
1836  *  and runs the watchdog check.
1837  *
1838  **********************************************************************/
1839
1840 static void
1841 ixgbe_local_timer(void *arg)
1842 {
1843         struct adapter  *adapter = arg;
1844         device_t        dev = adapter->dev;
1845         struct ifnet    *ifp = adapter->ifp;
1846         struct ix_queue *que = adapter->queues;
1847         struct tx_ring  *txr = adapter->tx_rings;
1848         int             hung, busy, paused;
1849
1850         IXGBE_CORE_LOCK(adapter);
1851         hung = busy = paused = 0;
1852
1853         /* Check for pluggable optics */
1854         if (adapter->sfp_probe)
1855                 if (!ixgbe_sfp_probe(adapter))
1856                         goto out; /* Nothing to do */
1857
1858         ixgbe_update_link_status(adapter);
1859         ixgbe_update_stats_counters(adapter);
1860
1861         /*
1862          * If the interface has been paused
1863          * then don't do the watchdog check
1864          */
1865         if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
1866                 paused = 1;
1867
1868         /*
1869         ** Check the TX queues status
1870         **      - central locked handling of OACTIVE
1871         **      - watchdog only if all queues show hung
1872         */          
1873         for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
1874                 if ((txr->queue_status & IXGBE_QUEUE_HUNG) &&
1875                     (paused == 0))
1876                         ++hung;
1877                 if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
1878                         ++busy;
1879         }
1880         /* Only truely watchdog if all queues show hung */
1881         if (hung == adapter->num_queues)
1882                 goto watchdog;
1883         /* Only turn off the stack flow when ALL are depleted */
1884         if (busy == adapter->num_queues)
1885                 ifp->if_flags |= IFF_OACTIVE;
1886         else if ((ifp->if_flags & IFF_OACTIVE) &&
1887             (busy < adapter->num_queues))
1888                 ifp->if_flags &= ~IFF_OACTIVE;
1889
1890 out:
1891         ixgbe_rearm_queues(adapter, adapter->que_mask);
1892         callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1893         IXGBE_CORE_UNLOCK(adapter);
1894         return;
1895
1896 watchdog:
1897         device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1898         device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1899             IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
1900             IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
1901         device_printf(dev,"TX(%d) desc avail = %d,"
1902             "Next TX to Clean = %d\n",
1903             txr->me, txr->tx_avail, txr->next_to_clean);
1904         adapter->ifp->if_flags &= ~IFF_RUNNING;
1905         adapter->watchdog_events++;
1906         ixgbe_init_locked(adapter);
1907
1908         IXGBE_CORE_UNLOCK(adapter);
1909 }
1910
1911 /*
1912 ** Note: this routine updates the OS on the link state
1913 **      the real check of the hardware only happens with
1914 **      a link interrupt.
1915 */
1916 static void
1917 ixgbe_update_link_status(struct adapter *adapter)
1918 {
1919         struct ifnet    *ifp = adapter->ifp;
1920         struct tx_ring *txr = adapter->tx_rings;
1921         device_t dev = adapter->dev;
1922
1923
1924         if (adapter->link_up){ 
1925                 if (adapter->link_active == FALSE) {
1926                         if (bootverbose)
1927                                 device_printf(dev,"Link is up %d Gbps %s \n",
1928                                     ((adapter->link_speed == 128)? 10:1),
1929                                     "Full Duplex");
1930                         adapter->link_active = TRUE;
1931                         /* Update any Flow Control changes */
1932                         ixgbe_fc_enable(&adapter->hw);
1933                         ifp->if_link_state = LINK_STATE_UP;
1934                         if_link_state_change(ifp);
1935                 }
1936         } else { /* Link down */
1937                 if (adapter->link_active == TRUE) {
1938                         if (bootverbose)
1939                                 device_printf(dev,"Link is Down\n");
1940                         ifp->if_link_state = LINK_STATE_DOWN;
1941                         if_link_state_change(ifp);
1942                         adapter->link_active = FALSE;
1943                         for (int i = 0; i < adapter->num_queues;
1944                             i++, txr++)
1945                                 txr->queue_status = IXGBE_QUEUE_IDLE;
1946                 }
1947         }
1948
1949         return;
1950 }
1951
1952
1953 /*********************************************************************
1954  *
1955  *  This routine disables all traffic on the adapter by issuing a
1956  *  global reset on the MAC and deallocates TX/RX buffers.
1957  *
1958  **********************************************************************/
1959
1960 static void
1961 ixgbe_stop(void *arg)
1962 {
1963         struct ifnet   *ifp;
1964         struct adapter *adapter = arg;
1965         struct ixgbe_hw *hw = &adapter->hw;
1966         ifp = adapter->ifp;
1967
1968         KKASSERT(lockstatus(&adapter->core_lock, curthread) != 0);
1969
1970         INIT_DEBUGOUT("ixgbe_stop: begin\n");
1971         ixgbe_disable_intr(adapter);
1972         callout_stop(&adapter->timer);
1973
1974         /* Let the stack know...*/
1975         ifp->if_flags &= ~IFF_RUNNING;
1976         ifp->if_flags |= IFF_OACTIVE;
1977
1978         ixgbe_reset_hw(hw);
1979         hw->adapter_stopped = FALSE;
1980         ixgbe_stop_adapter(hw);
1981         /* Turn off the laser */
1982         if (hw->phy.multispeed_fiber)
1983                 ixgbe_disable_tx_laser(hw);
1984
1985         /* reprogram the RAR[0] in case user changed it. */
1986         ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1987
1988         return;
1989 }
1990
1991
1992 /*********************************************************************
1993  *
1994  *  Determine hardware revision.
1995  *
1996  **********************************************************************/
1997 static void
1998 ixgbe_identify_hardware(struct adapter *adapter)
1999 {
2000         device_t        dev = adapter->dev;
2001         struct ixgbe_hw *hw = &adapter->hw;
2002
2003         /* Save off the information about this board */
2004         hw->vendor_id = pci_get_vendor(dev);
2005         hw->device_id = pci_get_device(dev);
2006         hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2007         hw->subsystem_vendor_id =
2008             pci_read_config(dev, PCIR_SUBVEND_0, 2);
2009         hw->subsystem_device_id =
2010             pci_read_config(dev, PCIR_SUBDEV_0, 2);
2011
2012         /* We need this here to set the num_segs below */
2013         ixgbe_set_mac_type(hw);
2014
2015         /* Pick up the 82599 and VF settings */
2016         if (hw->mac.type != ixgbe_mac_82598EB) {
2017                 hw->phy.smart_speed = ixgbe_smart_speed;
2018                 adapter->num_segs = IXGBE_82599_SCATTER;
2019         } else
2020                 adapter->num_segs = IXGBE_82598_SCATTER;
2021
2022         return;
2023 }
2024
2025 /*********************************************************************
2026  *
2027  *  Determine optic type
2028  *
2029  **********************************************************************/
2030 static void
2031 ixgbe_setup_optics(struct adapter *adapter)
2032 {
2033         struct ixgbe_hw *hw = &adapter->hw;
2034         int             layer;
2035         
2036         layer = ixgbe_get_supported_physical_layer(hw);
2037
2038         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2039                 adapter->optics = IFM_10G_T;
2040                 return;
2041         }
2042
2043         if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2044                 adapter->optics = IFM_1000_T;
2045                 return;
2046         }
2047
2048         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2049             IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2050                 adapter->optics = IFM_10G_LR;
2051                 return;
2052         }
2053
2054         if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2055                 adapter->optics = IFM_10G_SR;
2056                 return;
2057         }
2058
2059         if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2060                 adapter->optics = IFM_10G_TWINAX;
2061                 return;
2062         }
2063
2064         if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2065             IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2066                 adapter->optics = IFM_10G_CX4;
2067                 return;
2068         }
2069
2070         /* If we get here just set the default */
2071         adapter->optics = IFM_ETHER | IFM_AUTO;
2072         return;
2073 }
2074
2075 /*********************************************************************
2076  *
2077  *  Setup the Legacy or MSI Interrupt handler
2078  *
2079  **********************************************************************/
2080 static int
2081 ixgbe_allocate_legacy(struct adapter *adapter)
2082 {
2083         device_t dev = adapter->dev;
2084         struct          ix_queue *que = adapter->queues;
2085         int error, rid = 0;
2086         unsigned int intr_flags;
2087
2088         /* MSI RID at 1 */
2089         if (adapter->msix == 1)
2090                 rid = 1;
2091
2092         /* Try allocating a MSI interrupt first */
2093         adapter->intr_type = pci_alloc_1intr(dev, ixgbe_msi_enable,
2094                 &rid, &intr_flags);
2095
2096         /* We allocate a single interrupt resource */
2097         adapter->res = bus_alloc_resource_any(dev,
2098             SYS_RES_IRQ, &rid, intr_flags);
2099         if (adapter->res == NULL) {
2100                 device_printf(dev, "Unable to allocate bus resource: "
2101                     "interrupt\n");
2102                 return (ENXIO);
2103         }
2104
2105         /* Tasklets for Link, SFP and Multispeed Fiber */
2106         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2107         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2108 #ifdef IXGBE_FDIR
2109         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2110 #endif
2111
2112         if ((error = bus_setup_intr(dev, adapter->res, INTR_MPSAFE,
2113             ixgbe_legacy_irq, que, &adapter->tag, &adapter->serializer)) != 0) {
2114                 device_printf(dev, "Failed to register fast interrupt "
2115                     "handler: %d\n", error);
2116                 taskqueue_free(adapter->tq);
2117                 adapter->tq = NULL;
2118                 return (error);
2119         }
2120         /* For simplicity in the handlers */
2121         adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2122
2123         return (0);
2124 }
2125
2126
2127 /*********************************************************************
2128  *
2129  *  Setup MSIX Interrupt resources and handlers 
2130  *
2131  **********************************************************************/
2132 static int
2133 ixgbe_allocate_msix(struct adapter *adapter)
2134 {
2135         device_t        dev = adapter->dev;
2136         struct          ix_queue *que = adapter->queues;
2137         int             error, rid, vector = 0;
2138         char            desc[16];
2139
2140         error = pci_setup_msix(dev);
2141         if (error) {
2142                 device_printf(dev, "MSI-X setup failed\n");
2143                 return (error);
2144         }
2145
2146         for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2147                 rid = vector + 1;
2148
2149                 /*
2150                 ** Bind the msix vector, and thus the
2151                 ** ring to the corresponding cpu.
2152                 */
2153                 error = pci_alloc_msix_vector(dev, vector, &rid, i);
2154                 if (error) {
2155                         device_printf(dev, "pci_alloc_msix_vector failed\n");
2156                         return (error);
2157                 }
2158
2159                 que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2160                     RF_SHAREABLE | RF_ACTIVE);
2161                 if (que->res == NULL) {
2162                         device_printf(dev,"Unable to allocate"
2163                             " bus resource: que interrupt [%d]\n", vector);
2164                         return (ENXIO);
2165                 }
2166                 /* Set the handler function */
2167                 ksnprintf(desc, sizeof(desc), "%s que %d",
2168                     device_get_nameunit(dev), i);
2169                 error = bus_setup_intr_descr(dev, que->res, INTR_MPSAFE,
2170                     ixgbe_msix_que, que, &que->tag, &que->serializer, desc);
2171                 if (error) {
2172                         que->res = NULL;
2173                         device_printf(dev, "Failed to register QUE handler");
2174                         return (error);
2175                 }
2176                 que->msix = vector;
2177                 adapter->que_mask |= (u64)(1 << que->msix);
2178         }
2179
2180         /* and Link, bind vector to cpu #0 */
2181         rid = vector + 1;
2182         error = pci_alloc_msix_vector(dev, vector, &rid, 0);
2183         if (error) {
2184                 device_printf(dev, "pci_alloc_msix_vector failed\n");
2185                 return (error);
2186         }
2187         adapter->res = bus_alloc_resource_any(dev,
2188             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2189         if (!adapter->res) {
2190                 device_printf(dev,"Unable to allocate"
2191             " bus resource: Link interrupt [%d]\n", rid);
2192                 return (ENXIO);
2193         }
2194         /* Set the link handler function */
2195         error = bus_setup_intr_descr(dev, adapter->res, INTR_MPSAFE,
2196             ixgbe_msix_link, adapter, &adapter->tag, &adapter->serializer,
2197             "link");
2198         if (error) {
2199                 adapter->res = NULL;
2200                 device_printf(dev, "Failed to register LINK handler");
2201                 return (error);
2202         }
2203         pci_enable_msix(dev);
2204
2205         adapter->linkvec = vector;
2206         /* Tasklets for Link, SFP and Multispeed Fiber */
2207         TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2208         TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2209 #ifdef IXGBE_FDIR
2210         TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2211 #endif
2212
2213         return (0);
2214 }
2215
2216 /*
2217  * Setup Either MSI/X or MSI
2218  */
2219 static int
2220 ixgbe_setup_msix(struct adapter *adapter)
2221 {
2222         device_t dev = adapter->dev;
2223         int rid, want, queues, msgs;
2224
2225         /* Override by tuneable */
2226         if (ixgbe_enable_msix == 0)
2227                 goto msi;
2228
2229         /* First try MSI/X */
2230         rid = PCIR_BAR(MSIX_82598_BAR);
2231         adapter->msix_mem = bus_alloc_resource_any(dev,
2232             SYS_RES_MEMORY, &rid, RF_ACTIVE);
2233         if (!adapter->msix_mem) {
2234                 rid += 4;       /* 82599 maps in higher BAR */
2235                 adapter->msix_mem = bus_alloc_resource_any(dev,
2236                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
2237         }
2238         if (!adapter->msix_mem) {
2239                 /* May not be enabled */
2240                 device_printf(adapter->dev,
2241                     "Unable to map MSIX table \n");
2242                 goto msi;
2243         }
2244
2245         msgs = pci_msix_count(dev); 
2246         if (msgs == 0) { /* system has msix disabled */
2247                 bus_release_resource(dev, SYS_RES_MEMORY,
2248                     rid, adapter->msix_mem);
2249                 adapter->msix_mem = NULL;
2250                 goto msi;
2251         }
2252
2253         /* Figure out a reasonable auto config value */
2254         queues = (ncpus > (msgs-1)) ? (msgs-1) : ncpus;
2255
2256         if (ixgbe_num_queues != 0)
2257                 queues = ixgbe_num_queues;
2258         /* Set max queues to 8 when autoconfiguring */
2259         else if ((ixgbe_num_queues == 0) && (queues > 8))
2260                 queues = 8;
2261
2262         /*
2263         ** Want one vector (RX/TX pair) per queue
2264         ** plus an additional for Link.
2265         */
2266         want = queues + 1;
2267         if (msgs >= want)
2268                 msgs = want;
2269         else {
2270                 device_printf(adapter->dev,
2271                     "MSIX Configuration Problem, "
2272                     "%d vectors but %d queues wanted!\n",
2273                     msgs, want);
2274                 return (0); /* Will go to Legacy setup */
2275         }
2276         if (msgs) {
2277                 device_printf(adapter->dev,
2278                     "Using MSIX interrupts with %d vectors\n", msgs);
2279                 adapter->num_queues = queues;
2280                 return (msgs);
2281         }
2282 msi:
2283         msgs = pci_msi_count(dev);
2284         return (msgs);
2285 }
2286
2287
2288 static int
2289 ixgbe_allocate_pci_resources(struct adapter *adapter)
2290 {
2291         int             rid;
2292         device_t        dev = adapter->dev;
2293
2294         rid = PCIR_BAR(0);
2295         adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2296             &rid, RF_ACTIVE);
2297
2298         if (!(adapter->pci_mem)) {
2299                 device_printf(dev,"Unable to allocate bus resource: memory\n");
2300                 return (ENXIO);
2301         }
2302
2303         adapter->osdep.mem_bus_space_tag =
2304                 rman_get_bustag(adapter->pci_mem);
2305         adapter->osdep.mem_bus_space_handle =
2306                 rman_get_bushandle(adapter->pci_mem);
2307         adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2308
2309         /* Legacy defaults */
2310         adapter->num_queues = 1;
2311         adapter->hw.back = &adapter->osdep;
2312
2313         /*
2314         ** Now setup MSI or MSI/X, should
2315         ** return us the number of supported
2316         ** vectors. (Will be 1 for MSI)
2317         */
2318         adapter->msix = ixgbe_setup_msix(adapter);
2319         return (0);
2320 }
2321
2322 static void
2323 ixgbe_free_pci_resources(struct adapter * adapter)
2324 {
2325         struct          ix_queue *que = adapter->queues;
2326         device_t        dev = adapter->dev;
2327         int             rid, memrid;
2328
2329         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2330                 memrid = PCIR_BAR(MSIX_82598_BAR);
2331         else
2332                 memrid = PCIR_BAR(MSIX_82599_BAR);
2333
2334         /*
2335         ** There is a slight possibility of a failure mode
2336         ** in attach that will result in entering this function
2337         ** before interrupt resources have been initialized, and
2338         ** in that case we do not want to execute the loops below
2339         ** We can detect this reliably by the state of the adapter
2340         ** res pointer.
2341         */
2342         if (adapter->res == NULL)
2343                 goto mem;
2344
2345         /*
2346         **  Release all msix queue resources:
2347         */
2348         for (int i = 0; i < adapter->num_queues; i++, que++) {
2349                 rid = que->msix + 1;
2350                 if (que->tag != NULL) {
2351                         bus_teardown_intr(dev, que->res, que->tag);
2352                         que->tag = NULL;
2353                 }
2354                 if (que->res != NULL)
2355                         bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2356         }
2357
2358
2359         /* Clean the Legacy or Link interrupt last */
2360         if (adapter->linkvec) /* we are doing MSIX */
2361                 rid = adapter->linkvec + 1;
2362         else
2363                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2364
2365         if (adapter->tag != NULL) {
2366                 bus_teardown_intr(dev, adapter->res, adapter->tag);
2367                 adapter->tag = NULL;
2368         }
2369         if (adapter->res != NULL)
2370                 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2371         if (adapter->intr_type == PCI_INTR_TYPE_MSI)
2372                 pci_release_msi(adapter->dev);
2373
2374 mem:
2375         if (adapter->msix)
2376                 pci_release_msi(dev);
2377
2378         if (adapter->msix_mem != NULL)
2379                 bus_release_resource(dev, SYS_RES_MEMORY,
2380                     memrid, adapter->msix_mem);
2381
2382         if (adapter->pci_mem != NULL)
2383                 bus_release_resource(dev, SYS_RES_MEMORY,
2384                     PCIR_BAR(0), adapter->pci_mem);
2385
2386         return;
2387 }
2388
2389 /*********************************************************************
2390  *
2391  *  Setup networking device structure and register an interface.
2392  *
2393  **********************************************************************/
2394 static int
2395 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2396 {
2397         struct ixgbe_hw *hw = &adapter->hw;
2398         struct ifnet   *ifp;
2399
2400         INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2401
2402         ifp = adapter->ifp = if_alloc(IFT_ETHER);
2403         if (ifp == NULL) {
2404                 device_printf(dev, "can not allocate ifnet structure\n");
2405                 return (-1);
2406         }
2407         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2408         ifp->if_baudrate = 1000000000;
2409         ifp->if_init = ixgbe_init;
2410         ifp->if_softc = adapter;
2411         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2412         ifp->if_ioctl = ixgbe_ioctl;
2413         ifp->if_start = ixgbe_start;
2414 #if 0 /* __FreeBSD_version >= 800000 */
2415         ifp->if_transmit = ixgbe_mq_start;
2416         ifp->if_qflush = ixgbe_qflush;
2417 #endif
2418         ifp->if_snd.ifq_maxlen = adapter->num_tx_desc - 2;
2419
2420         ether_ifattach(ifp, adapter->hw.mac.addr, NULL);
2421
2422         adapter->max_frame_size =
2423             ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2424
2425         /*
2426          * Tell the upper layer(s) we support long frames.
2427          */
2428         ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2429
2430         ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2431         ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2432         ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2433 #if 0 /* NET_TSO */
2434                              |  IFCAP_VLAN_HWTSO
2435 #endif
2436                              |  IFCAP_VLAN_MTU;
2437         ifp->if_capenable = ifp->if_capabilities;
2438
2439         /* Don't enable LRO by default */
2440 #if 0 /* NET_LRO */
2441         ifp->if_capabilities |= IFCAP_LRO;
2442 #endif
2443
2444         /*
2445         ** Don't turn this on by default, if vlans are
2446         ** created on another pseudo device (eg. lagg)
2447         ** then vlan events are not passed thru, breaking
2448         ** operation, but with HW FILTER off it works. If
2449         ** using vlans directly on the ixgbe driver you can
2450         ** enable this and get full hardware tag filtering.
2451         */
2452         ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2453
2454         /*
2455          * Specify the media types supported by this adapter and register
2456          * callbacks to update media and link information
2457          */
2458         ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2459                      ixgbe_media_status);
2460         ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2461         ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2462         if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2463                 ifmedia_add(&adapter->media,
2464                     IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2465                 ifmedia_add(&adapter->media,
2466                     IFM_ETHER | IFM_1000_T, 0, NULL);
2467         }
2468         ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2469         ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2470
2471         return (0);
2472 }
2473
2474 static void
2475 ixgbe_config_link(struct adapter *adapter)
2476 {
2477         struct ixgbe_hw *hw = &adapter->hw;
2478         u32     autoneg, err = 0;
2479         bool    sfp, negotiate;
2480
2481         sfp = ixgbe_is_sfp(hw);
2482
2483         if (sfp) { 
2484                 if (hw->phy.multispeed_fiber) {
2485                         hw->mac.ops.setup_sfp(hw);
2486                         ixgbe_enable_tx_laser(hw);
2487                         taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2488                 } else
2489                         taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2490         } else {
2491                 if (hw->mac.ops.check_link)
2492                         err = ixgbe_check_link(hw, &autoneg,
2493                             &adapter->link_up, FALSE);
2494                 if (err)
2495                         goto out;
2496                 autoneg = hw->phy.autoneg_advertised;
2497                 if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2498                         err  = hw->mac.ops.get_link_capabilities(hw,
2499                             &autoneg, &negotiate);
2500                 if (err)
2501                         goto out;
2502                 if (hw->mac.ops.setup_link)
2503                         err = hw->mac.ops.setup_link(hw, autoneg,
2504                             negotiate, adapter->link_up);
2505         }
2506 out:
2507         return;
2508 }
2509
2510 /********************************************************************
2511  * Manage DMA'able memory.
2512  *******************************************************************/
2513 static void
2514 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2515 {
2516         if (error)
2517                 return;
2518         *(bus_addr_t *) arg = segs->ds_addr;
2519         return;
2520 }
2521
2522 static int
2523 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2524                 struct ixgbe_dma_alloc *dma, int mapflags)
2525 {
2526         device_t dev = adapter->dev;
2527         int             r;
2528
2529         r = bus_dma_tag_create(NULL,    /* parent */
2530                                DBA_ALIGN, 0,    /* alignment, bounds */
2531                                BUS_SPACE_MAXADDR,       /* lowaddr */
2532                                BUS_SPACE_MAXADDR,       /* highaddr */
2533                                NULL, NULL,      /* filter, filterarg */
2534                                size,    /* maxsize */
2535                                1,       /* nsegments */
2536                                size,    /* maxsegsize */
2537                                BUS_DMA_ALLOCNOW,        /* flags */
2538                                &dma->dma_tag);
2539         if (r != 0) {
2540                 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2541                        "error %u\n", r);
2542                 goto fail_0;
2543         }
2544         r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2545                              BUS_DMA_NOWAIT, &dma->dma_map);
2546         if (r != 0) {
2547                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2548                        "error %u\n", r);
2549                 goto fail_1;
2550         }
2551         r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2552                             size,
2553                             ixgbe_dmamap_cb,
2554                             &dma->dma_paddr,
2555                             mapflags | BUS_DMA_NOWAIT);
2556         if (r != 0) {
2557                 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2558                        "error %u\n", r);
2559                 goto fail_2;
2560         }
2561         dma->dma_size = size;
2562         return (0);
2563 fail_2:
2564         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2565 fail_1:
2566         bus_dma_tag_destroy(dma->dma_tag);
2567 fail_0:
2568         dma->dma_map = NULL;
2569         dma->dma_tag = NULL;
2570         return (r);
2571 }
2572
2573 static void
2574 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2575 {
2576         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2577             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2578         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2579         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2580         bus_dma_tag_destroy(dma->dma_tag);
2581 }
2582
2583
2584 /*********************************************************************
2585  *
2586  *  Allocate memory for the transmit and receive rings, and then
2587  *  the descriptors associated with each, called only once at attach.
2588  *
2589  **********************************************************************/
2590 static int
2591 ixgbe_allocate_queues(struct adapter *adapter)
2592 {
2593         device_t        dev = adapter->dev;
2594         struct ix_queue *que;
2595         struct tx_ring  *txr;
2596         struct rx_ring  *rxr;
2597         int rsize, tsize, error = IXGBE_SUCCESS;
2598         int txconf = 0, rxconf = 0;
2599
2600         /* First allocate the top level queue structs */
2601         if (!(adapter->queues =
2602             (struct ix_queue *) kmalloc(sizeof(struct ix_queue) *
2603             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2604                 device_printf(dev, "Unable to allocate queue memory\n");
2605                 error = ENOMEM;
2606                 goto fail;
2607         }
2608
2609         /* First allocate the TX ring struct memory */
2610         if (!(adapter->tx_rings =
2611             (struct tx_ring *) kmalloc(sizeof(struct tx_ring) *
2612             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2613                 device_printf(dev, "Unable to allocate TX ring memory\n");
2614                 error = ENOMEM;
2615                 goto tx_fail;
2616         }
2617
2618         /* Next allocate the RX */
2619         if (!(adapter->rx_rings =
2620             (struct rx_ring *) kmalloc(sizeof(struct rx_ring) *
2621             adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2622                 device_printf(dev, "Unable to allocate RX ring memory\n");
2623                 error = ENOMEM;
2624                 goto rx_fail;
2625         }
2626
2627         /* For the ring itself */
2628         tsize = roundup2(adapter->num_tx_desc *
2629             sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2630
2631         /*
2632          * Now set up the TX queues, txconf is needed to handle the
2633          * possibility that things fail midcourse and we need to
2634          * undo memory gracefully
2635          */ 
2636         for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2637                 /* Set up some basics */
2638                 txr = &adapter->tx_rings[i];
2639                 txr->adapter = adapter;
2640                 txr->me = i;
2641
2642                 /* Initialize the TX side lock */
2643                 IXGBE_TX_LOCK_INIT(txr);
2644
2645                 if (ixgbe_dma_malloc(adapter, tsize,
2646                         &txr->txdma, BUS_DMA_NOWAIT)) {
2647                         device_printf(dev,
2648                             "Unable to allocate TX Descriptor memory\n");
2649                         error = ENOMEM;
2650                         goto err_tx_desc;
2651                 }
2652                 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2653                 bzero((void *)txr->tx_base, tsize);
2654
2655                 /* Now allocate transmit buffers for the ring */
2656                 if (ixgbe_allocate_transmit_buffers(txr)) {
2657                         device_printf(dev,
2658                             "Critical Failure setting up transmit buffers\n");
2659                         error = ENOMEM;
2660                         goto err_tx_desc;
2661                 }
2662 #if 0 /* __FreeBSD_version >= 800000 */
2663                 /* Allocate a buf ring */
2664                 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2665                     M_WAITOK, &txr->tx_mtx);
2666                 if (txr->br == NULL) {
2667                         device_printf(dev,
2668                             "Critical Failure setting up buf ring\n");
2669                         error = ENOMEM;
2670                         goto err_tx_desc;
2671                 }
2672 #endif
2673         }
2674
2675         /*
2676          * Next the RX queues...
2677          */ 
2678         rsize = roundup2(adapter->num_rx_desc *
2679             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2680         for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2681                 rxr = &adapter->rx_rings[i];
2682                 /* Set up some basics */
2683                 rxr->adapter = adapter;
2684                 rxr->me = i;
2685
2686                 /* Initialize the RX side lock */
2687                 ksnprintf(rxr->lock_name, sizeof(rxr->lock_name), "%s:rx(%d)",
2688                     device_get_nameunit(dev), rxr->me);
2689                 lockinit(&rxr->rx_lock, rxr->lock_name, 0, LK_CANRECURSE);
2690
2691                 if (ixgbe_dma_malloc(adapter, rsize,
2692                         &rxr->rxdma, BUS_DMA_NOWAIT)) {
2693                         device_printf(dev,
2694                             "Unable to allocate RxDescriptor memory\n");
2695                         error = ENOMEM;
2696                         goto err_rx_desc;
2697                 }
2698                 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2699                 bzero((void *)rxr->rx_base, rsize);
2700
2701                 /* Allocate receive buffers for the ring*/
2702                 if (ixgbe_allocate_receive_buffers(rxr)) {
2703                         device_printf(dev,
2704                             "Critical Failure setting up receive buffers\n");
2705                         error = ENOMEM;
2706                         goto err_rx_desc;
2707                 }
2708         }
2709
2710         /*
2711         ** Finally set up the queue holding structs
2712         */
2713         for (int i = 0; i < adapter->num_queues; i++) {
2714                 que = &adapter->queues[i];
2715                 que->adapter = adapter;
2716                 que->txr = &adapter->tx_rings[i];
2717                 que->rxr = &adapter->rx_rings[i];
2718         }
2719
2720         return (0);
2721
2722 err_rx_desc:
2723         for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2724                 ixgbe_dma_free(adapter, &rxr->rxdma);
2725 err_tx_desc:
2726         for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2727                 ixgbe_dma_free(adapter, &txr->txdma);
2728         kfree(adapter->rx_rings, M_DEVBUF);
2729 rx_fail:
2730         kfree(adapter->tx_rings, M_DEVBUF);
2731 tx_fail:
2732         kfree(adapter->queues, M_DEVBUF);
2733 fail:
2734         return (error);
2735 }
2736
2737 /*********************************************************************
2738  *
2739  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2740  *  the information needed to transmit a packet on the wire. This is
2741  *  called only once at attach, setup is done every reset.
2742  *
2743  **********************************************************************/
2744 static int
2745 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2746 {
2747         struct adapter *adapter = txr->adapter;
2748         device_t dev = adapter->dev;
2749         struct ixgbe_tx_buf *txbuf;
2750         int error, i;
2751
2752         /*
2753          * Setup DMA descriptor areas.
2754          */
2755         if ((error = bus_dma_tag_create(
2756                                NULL,    /* parent */
2757                                1, 0,            /* alignment, bounds */
2758                                BUS_SPACE_MAXADDR,       /* lowaddr */
2759                                BUS_SPACE_MAXADDR,       /* highaddr */
2760                                NULL, NULL,              /* filter, filterarg */
2761                                IXGBE_TSO_SIZE,          /* maxsize */
2762                                adapter->num_segs,       /* nsegments */
2763                                PAGE_SIZE,               /* maxsegsize */
2764                                0,                       /* flags */
2765                                &txr->txtag))) {
2766                 device_printf(dev,"Unable to allocate TX DMA tag\n");
2767                 goto fail;
2768         }
2769
2770         if (!(txr->tx_buffers =
2771             (struct ixgbe_tx_buf *) kmalloc(sizeof(struct ixgbe_tx_buf) *
2772             adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2773                 device_printf(dev, "Unable to allocate tx_buffer memory\n");
2774                 error = ENOMEM;
2775                 goto fail;
2776         }
2777
2778         /* Create the descriptor buffer dma maps */
2779         txbuf = txr->tx_buffers;
2780         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2781                 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2782                 if (error != 0) {
2783                         device_printf(dev, "Unable to create TX DMA map\n");
2784                         goto fail;
2785                 }
2786         }
2787
2788         return 0;
2789 fail:
2790         /* We free all, it handles case where we are in the middle */
2791         ixgbe_free_transmit_structures(adapter);
2792         return (error);
2793 }
2794
2795 /*********************************************************************
2796  *
2797  *  Initialize a transmit ring.
2798  *
2799  **********************************************************************/
2800 static void
2801 ixgbe_setup_transmit_ring(struct tx_ring *txr)
2802 {
2803         struct adapter *adapter = txr->adapter;
2804         struct ixgbe_tx_buf *txbuf;
2805         int i;
2806 #ifdef DEV_NETMAP
2807         struct netmap_adapter *na = NA(adapter->ifp);
2808         struct netmap_slot *slot;
2809 #endif /* DEV_NETMAP */
2810
2811         /* Clear the old ring contents */
2812         IXGBE_TX_LOCK(txr);
2813 #ifdef DEV_NETMAP
2814         /*
2815          * (under lock): if in netmap mode, do some consistency
2816          * checks and set slot to entry 0 of the netmap ring.
2817          */
2818         slot = netmap_reset(na, NR_TX, txr->me, 0);
2819 #endif /* DEV_NETMAP */
2820         bzero((void *)txr->tx_base,
2821               (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
2822         /* Reset indices */
2823         txr->next_avail_desc = 0;
2824         txr->next_to_clean = 0;
2825
2826         /* Free any existing tx buffers. */
2827         txbuf = txr->tx_buffers;
2828         for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2829                 if (txbuf->m_head != NULL) {
2830                         bus_dmamap_sync(txr->txtag, txbuf->map,
2831                             BUS_DMASYNC_POSTWRITE);
2832                         bus_dmamap_unload(txr->txtag, txbuf->map);
2833                         m_freem(txbuf->m_head);
2834                         txbuf->m_head = NULL;
2835                 }
2836 #ifdef DEV_NETMAP
2837                 /*
2838                  * In netmap mode, set the map for the packet buffer.
2839                  * NOTE: Some drivers (not this one) also need to set
2840                  * the physical buffer address in the NIC ring.
2841                  * Slots in the netmap ring (indexed by "si") are
2842                  * kring->nkr_hwofs positions "ahead" wrt the
2843                  * corresponding slot in the NIC ring. In some drivers
2844                  * (not here) nkr_hwofs can be negative. Function
2845                  * netmap_idx_n2k() handles wraparounds properly.
2846                  */
2847                 if (slot) {
2848                         int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
2849                         netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
2850                 }
2851 #endif /* DEV_NETMAP */
2852                 /* Clear the EOP index */
2853                 txbuf->eop_index = -1;
2854         }
2855
2856 #ifdef IXGBE_FDIR
2857         /* Set the rate at which we sample packets */
2858         if (adapter->hw.mac.type != ixgbe_mac_82598EB)
2859                 txr->atr_sample = atr_sample_rate;
2860 #endif
2861
2862         /* Set number of descriptors available */
2863         txr->tx_avail = adapter->num_tx_desc;
2864
2865         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2866             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2867         IXGBE_TX_UNLOCK(txr);
2868 }
2869
2870 /*********************************************************************
2871  *
2872  *  Initialize all transmit rings.
2873  *
2874  **********************************************************************/
2875 static int
2876 ixgbe_setup_transmit_structures(struct adapter *adapter)
2877 {
2878         struct tx_ring *txr = adapter->tx_rings;
2879
2880         for (int i = 0; i < adapter->num_queues; i++, txr++)
2881                 ixgbe_setup_transmit_ring(txr);
2882
2883         return (0);
2884 }
2885
2886 /*********************************************************************
2887  *
2888  *  Enable transmit unit.
2889  *
2890  **********************************************************************/
2891 static void
2892 ixgbe_initialize_transmit_units(struct adapter *adapter)
2893 {
2894         struct tx_ring  *txr = adapter->tx_rings;
2895         struct ixgbe_hw *hw = &adapter->hw;
2896
2897         /* Setup the Base and Length of the Tx Descriptor Ring */
2898
2899         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2900                 u64     tdba = txr->txdma.dma_paddr;
2901                 u32     txctrl;
2902
2903                 IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
2904                        (tdba & 0x00000000ffffffffULL));
2905                 IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
2906                 IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
2907                     adapter->num_tx_desc * sizeof(struct ixgbe_legacy_tx_desc));
2908
2909                 /* Setup the HW Tx Head and Tail descriptor pointers */
2910                 IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
2911                 IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
2912
2913                 /* Setup Transmit Descriptor Cmd Settings */
2914                 txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
2915                 txr->queue_status = IXGBE_QUEUE_IDLE;
2916
2917                 /* Disable Head Writeback */
2918                 switch (hw->mac.type) {
2919                 case ixgbe_mac_82598EB:
2920                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
2921                         break;
2922                 case ixgbe_mac_82599EB:
2923                 case ixgbe_mac_X540:
2924                 default:
2925                         txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
2926                         break;
2927                 }
2928                 txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
2929                 switch (hw->mac.type) {
2930                 case ixgbe_mac_82598EB:
2931                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
2932                         break;
2933                 case ixgbe_mac_82599EB:
2934                 case ixgbe_mac_X540:
2935                 default:
2936                         IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
2937                         break;
2938                 }
2939
2940         }
2941
2942         if (hw->mac.type != ixgbe_mac_82598EB) {
2943                 u32 dmatxctl, rttdcs;
2944                 dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
2945                 dmatxctl |= IXGBE_DMATXCTL_TE;
2946                 IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
2947                 /* Disable arbiter to set MTQC */
2948                 rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
2949                 rttdcs |= IXGBE_RTTDCS_ARBDIS;
2950                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2951                 IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
2952                 rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
2953                 IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
2954         }
2955
2956         return;
2957 }
2958
2959 /*********************************************************************
2960  *
2961  *  Free all transmit rings.
2962  *
2963  **********************************************************************/
2964 static void
2965 ixgbe_free_transmit_structures(struct adapter *adapter)
2966 {
2967         struct tx_ring *txr = adapter->tx_rings;
2968
2969         for (int i = 0; i < adapter->num_queues; i++, txr++) {
2970                 IXGBE_TX_LOCK(txr);
2971                 ixgbe_free_transmit_buffers(txr);
2972                 ixgbe_dma_free(adapter, &txr->txdma);
2973                 IXGBE_TX_UNLOCK(txr);
2974                 IXGBE_TX_LOCK_DESTROY(txr);
2975         }
2976         kfree(adapter->tx_rings, M_DEVBUF);
2977 }
2978
2979 /*********************************************************************
2980  *
2981  *  Free transmit ring related data structures.
2982  *
2983  **********************************************************************/
2984 static void
2985 ixgbe_free_transmit_buffers(struct tx_ring *txr)
2986 {
2987         struct adapter *adapter = txr->adapter;
2988         struct ixgbe_tx_buf *tx_buffer;
2989         int             i;
2990
2991         INIT_DEBUGOUT("free_transmit_ring: begin");
2992
2993         if (txr->tx_buffers == NULL)
2994                 return;
2995
2996         tx_buffer = txr->tx_buffers;
2997         for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2998                 if (tx_buffer->m_head != NULL) {
2999                         bus_dmamap_sync(txr->txtag, tx_buffer->map,
3000                             BUS_DMASYNC_POSTWRITE);
3001                         bus_dmamap_unload(txr->txtag,
3002                             tx_buffer->map);
3003                         m_freem(tx_buffer->m_head);
3004                         tx_buffer->m_head = NULL;
3005                         if (tx_buffer->map != NULL) {
3006                                 bus_dmamap_destroy(txr->txtag,
3007                                     tx_buffer->map);
3008                                 tx_buffer->map = NULL;
3009                         }
3010                 } else if (tx_buffer->map != NULL) {
3011                         bus_dmamap_unload(txr->txtag,
3012                             tx_buffer->map);
3013                         bus_dmamap_destroy(txr->txtag,
3014                             tx_buffer->map);
3015                         tx_buffer->map = NULL;
3016                 }
3017         }
3018 #if 0 /* __FreeBSD_version >= 800000 */
3019         if (txr->br != NULL)
3020                 buf_ring_free(txr->br, M_DEVBUF);
3021 #endif
3022         if (txr->tx_buffers != NULL) {
3023                 kfree(txr->tx_buffers, M_DEVBUF);
3024                 txr->tx_buffers = NULL;
3025         }
3026         if (txr->txtag != NULL) {
3027                 bus_dma_tag_destroy(txr->txtag);
3028                 txr->txtag = NULL;
3029         }
3030         return;
3031 }
3032
3033 /*********************************************************************
3034  *
3035  *  Advanced Context Descriptor setup for VLAN or CSUM
3036  *
3037  **********************************************************************/
3038
3039 static bool
3040 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3041 {
3042         struct adapter *adapter = txr->adapter;
3043         struct ixgbe_adv_tx_context_desc *TXD;
3044         struct ixgbe_tx_buf        *tx_buffer;
3045         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3046         struct ether_vlan_header *eh;
3047         struct ip *ip;
3048         struct ip6_hdr *ip6;
3049         int  ehdrlen, ip_hlen = 0;
3050         u16     etype;
3051         u8      ipproto = 0;
3052         bool    offload = TRUE;
3053         int ctxd = txr->next_avail_desc;
3054         u16 vtag = 0;
3055
3056
3057         if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3058                 offload = FALSE;
3059
3060         tx_buffer = &txr->tx_buffers[ctxd];
3061         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3062
3063         /*
3064         ** In advanced descriptors the vlan tag must 
3065         ** be placed into the descriptor itself.
3066         */
3067         if (mp->m_flags & M_VLANTAG) {
3068                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3069                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3070         } else if (offload == FALSE)
3071                 return FALSE;
3072
3073         /*
3074          * Determine where frame payload starts.
3075          * Jump over vlan headers if already present,
3076          * helpful for QinQ too.
3077          */
3078         eh = mtod(mp, struct ether_vlan_header *);
3079         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3080                 etype = ntohs(eh->evl_proto);
3081                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3082         } else {
3083                 etype = ntohs(eh->evl_encap_proto);
3084                 ehdrlen = ETHER_HDR_LEN;
3085         }
3086
3087         /* Set the ether header length */
3088         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3089
3090         switch (etype) {
3091                 case ETHERTYPE_IP:
3092                         ip = (struct ip *)(mp->m_data + ehdrlen);
3093                         ip_hlen = ip->ip_hl << 2;
3094                         ipproto = ip->ip_p;
3095                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3096                         break;
3097                 case ETHERTYPE_IPV6:
3098                         ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3099                         ip_hlen = sizeof(struct ip6_hdr);
3100                         /* XXX-BZ this will go badly in case of ext hdrs. */
3101                         ipproto = ip6->ip6_nxt;
3102                         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3103                         break;
3104                 default:
3105                         offload = FALSE;
3106                         break;
3107         }
3108
3109         vlan_macip_lens |= ip_hlen;
3110         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3111
3112         switch (ipproto) {
3113                 case IPPROTO_TCP:
3114                         if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3115                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3116                         break;
3117
3118                 case IPPROTO_UDP:
3119                         if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3120                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3121                         break;
3122
3123 #if 0
3124                 case IPPROTO_SCTP:
3125                         if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3126                                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3127                         break;
3128 #endif
3129                 default:
3130                         offload = FALSE;
3131                         break;
3132         }
3133
3134         /* Now copy bits into descriptor */
3135         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3136         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3137         TXD->seqnum_seed = htole32(0);
3138         TXD->mss_l4len_idx = htole32(0);
3139
3140         tx_buffer->m_head = NULL;
3141         tx_buffer->eop_index = -1;
3142
3143         /* We've consumed the first desc, adjust counters */
3144         if (++ctxd == adapter->num_tx_desc)
3145                 ctxd = 0;
3146         txr->next_avail_desc = ctxd;
3147         --txr->tx_avail;
3148
3149         return (offload);
3150 }
3151
3152 /**********************************************************************
3153  *
3154  *  Setup work for hardware segmentation offload (TSO) on
3155  *  adapters using advanced tx descriptors
3156  *
3157  **********************************************************************/
3158 static bool
3159 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *paylen,
3160     u32 *olinfo_status)
3161 {
3162         struct adapter *adapter = txr->adapter;
3163         struct ixgbe_adv_tx_context_desc *TXD;
3164         struct ixgbe_tx_buf        *tx_buffer;
3165         u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3166         u16 vtag = 0, eh_type;
3167         u32 mss_l4len_idx = 0, len;
3168         int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3169         struct ether_vlan_header *eh;
3170 #if 0 /* IPv6 TSO */
3171 #ifdef INET6
3172         struct ip6_hdr *ip6;
3173 #endif
3174 #endif
3175 #ifdef INET
3176         struct ip *ip;
3177 #endif
3178         struct tcphdr *th;
3179
3180
3181         /*
3182          * Determine where frame payload starts.
3183          * Jump over vlan headers if already present
3184          */
3185         eh = mtod(mp, struct ether_vlan_header *);
3186         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3187                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3188                 eh_type = eh->evl_proto;
3189         } else {
3190                 ehdrlen = ETHER_HDR_LEN;
3191                 eh_type = eh->evl_encap_proto;
3192         }
3193
3194         /* Ensure we have at least the IP+TCP header in the first mbuf. */
3195         len = ehdrlen + sizeof(struct tcphdr);
3196         switch (ntohs(eh_type)) {
3197 #if 0 /* IPv6 TSO */
3198 #ifdef INET6
3199         case ETHERTYPE_IPV6:
3200                 if (mp->m_len < len + sizeof(struct ip6_hdr))
3201                         return FALSE;
3202                 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3203                 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3204                 if (ip6->ip6_nxt != IPPROTO_TCP)
3205                         return FALSE;
3206                 ip_hlen = sizeof(struct ip6_hdr);
3207                 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3208                 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3209                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3210                 break;
3211 #endif
3212 #endif
3213 #ifdef INET
3214         case ETHERTYPE_IP:
3215                 if (mp->m_len < len + sizeof(struct ip))
3216                         return FALSE;
3217                 ip = (struct ip *)(mp->m_data + ehdrlen);
3218                 if (ip->ip_p != IPPROTO_TCP)
3219                         return FALSE;
3220                 ip->ip_sum = 0;
3221                 ip_hlen = ip->ip_hl << 2;
3222                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3223                 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3224                     ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3225                 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3226                 /* Tell transmit desc to also do IPv4 checksum. */
3227                 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3228                 break;
3229 #endif
3230         default:
3231                 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3232                     __func__, ntohs(eh_type));
3233                 break;
3234         }
3235
3236         ctxd = txr->next_avail_desc;
3237         tx_buffer = &txr->tx_buffers[ctxd];
3238         TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3239
3240         tcp_hlen = th->th_off << 2;
3241
3242         /* This is used in the transmit desc in encap */
3243         *paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3244
3245         /* VLAN MACLEN IPLEN */
3246         if (mp->m_flags & M_VLANTAG) {
3247                 vtag = htole16(mp->m_pkthdr.ether_vlantag);
3248                 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3249         }
3250
3251         vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3252         vlan_macip_lens |= ip_hlen;
3253         TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3254
3255         /* ADV DTYPE TUCMD */
3256         type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3257         type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3258         TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3259
3260         /* MSS L4LEN IDX */
3261         mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3262         mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3263         TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3264
3265         TXD->seqnum_seed = htole32(0);
3266         tx_buffer->m_head = NULL;
3267         tx_buffer->eop_index = -1;
3268
3269         if (++ctxd == adapter->num_tx_desc)
3270                 ctxd = 0;
3271
3272         txr->tx_avail--;
3273         txr->next_avail_desc = ctxd;
3274         return TRUE;
3275 }
3276
3277 #ifdef IXGBE_FDIR
3278 /*
3279 ** This routine parses packet headers so that Flow
3280 ** Director can make a hashed filter table entry 
3281 ** allowing traffic flows to be identified and kept
3282 ** on the same cpu.  This would be a performance
3283 ** hit, but we only do it at IXGBE_FDIR_RATE of
3284 ** packets.
3285 */
3286 static void
3287 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3288 {
3289         struct adapter                  *adapter = txr->adapter;
3290         struct ix_queue                 *que;
3291         struct ip                       *ip;
3292         struct tcphdr                   *th;
3293         struct udphdr                   *uh;
3294         struct ether_vlan_header        *eh;
3295         union ixgbe_atr_hash_dword      input = {.dword = 0}; 
3296         union ixgbe_atr_hash_dword      common = {.dword = 0}; 
3297         int                             ehdrlen, ip_hlen;
3298         u16                             etype;
3299
3300         eh = mtod(mp, struct ether_vlan_header *);
3301         if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3302                 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3303                 etype = eh->evl_proto;
3304         } else {
3305                 ehdrlen = ETHER_HDR_LEN;
3306                 etype = eh->evl_encap_proto;
3307         }
3308
3309         /* Only handling IPv4 */
3310         if (etype != htons(ETHERTYPE_IP))
3311                 return;
3312
3313         ip = (struct ip *)(mp->m_data + ehdrlen);
3314         ip_hlen = ip->ip_hl << 2;
3315
3316         /* check if we're UDP or TCP */
3317         switch (ip->ip_p) {
3318         case IPPROTO_TCP:
3319                 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3320                 /* src and dst are inverted */
3321                 common.port.dst ^= th->th_sport;
3322                 common.port.src ^= th->th_dport;
3323                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3324                 break;
3325         case IPPROTO_UDP:
3326                 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3327                 /* src and dst are inverted */
3328                 common.port.dst ^= uh->uh_sport;
3329                 common.port.src ^= uh->uh_dport;
3330                 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3331                 break;
3332         default:
3333                 return;
3334         }
3335
3336         input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3337         if (mp->m_pkthdr.ether_vtag)
3338                 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3339         else
3340                 common.flex_bytes ^= etype;
3341         common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3342
3343         que = &adapter->queues[txr->me];
3344         /*
3345         ** This assumes the Rx queue and Tx
3346         ** queue are bound to the same CPU
3347         */
3348         ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3349             input, common, que->msix);
3350 }
3351 #endif /* IXGBE_FDIR */
3352
3353 /**********************************************************************
3354  *
3355  *  Examine each tx_buffer in the used queue. If the hardware is done
3356  *  processing the packet then free associated resources. The
3357  *  tx_buffer is put back on the free queue.
3358  *
3359  **********************************************************************/
3360 static void
3361 ixgbe_txeof(struct tx_ring *txr)
3362 {
3363         struct adapter  *adapter = txr->adapter;
3364         struct ifnet    *ifp = adapter->ifp;
3365         u32     first, last, done, processed;
3366         struct ixgbe_tx_buf *tx_buffer;
3367         struct ixgbe_legacy_tx_desc *tx_desc, *eop_desc;
3368
3369         IXGBE_TX_LOCK_ASSERT(txr);
3370
3371 #ifdef DEV_NETMAP
3372         if (ifp->if_capenable & IFCAP_NETMAP) {
3373                 struct netmap_adapter *na = NA(ifp);
3374                 struct netmap_kring *kring = &na->tx_rings[txr->me];
3375
3376                 tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base;
3377
3378                 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3379                     BUS_DMASYNC_POSTREAD);
3380                 /*
3381                  * In netmap mode, all the work is done in the context
3382                  * of the client thread. Interrupt handlers only wake up
3383                  * clients, which may be sleeping on individual rings
3384                  * or on a global resource for all rings.
3385                  * To implement tx interrupt mitigation, we wake up the client
3386                  * thread roughly every half ring, even if the NIC interrupts
3387                  * more frequently. This is implemented as follows:
3388                  * - ixgbe_txsync() sets kring->nr_kflags with the index of
3389                  *   the slot that should wake up the thread (nkr_num_slots
3390                  *   means the user thread should not be woken up);
3391                  * - the driver ignores tx interrupts unless netmap_mitigate=0
3392                  *   or the slot has the DD bit set.
3393                  *
3394                  * When the driver has separate locks, we need to
3395                  * release and re-acquire txlock to avoid deadlocks.
3396                  * XXX see if we can find a better way.
3397                  */
3398                 if (!netmap_mitigate ||
3399                     (kring->nr_kflags < kring->nkr_num_slots &&
3400                      tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) {
3401                         kring->nr_kflags = kring->nkr_num_slots;
3402                         selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3403                         IXGBE_TX_UNLOCK(txr);
3404                         IXGBE_CORE_LOCK(adapter);
3405                         selwakeuppri(&na->tx_si, PI_NET);
3406                         IXGBE_CORE_UNLOCK(adapter);
3407                         IXGBE_TX_LOCK(txr);
3408                 }
3409                 return FALSE;
3410         }
3411 #endif /* DEV_NETMAP */
3412
3413         if (txr->tx_avail == adapter->num_tx_desc) {
3414                 txr->queue_status = IXGBE_QUEUE_IDLE;
3415                 return;
3416         }
3417
3418         processed = 0;
3419         first = txr->next_to_clean;
3420         tx_buffer = &txr->tx_buffers[first];
3421         /* For cleanup we just use legacy struct */
3422         tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3423         last = tx_buffer->eop_index;
3424         if (last == -1)
3425                 return;
3426         eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3427
3428         /*
3429         ** Get the index of the first descriptor
3430         ** BEYOND the EOP and call that 'done'.
3431         ** I do this so the comparison in the
3432         ** inner while loop below can be simple
3433         */
3434         if (++last == adapter->num_tx_desc) last = 0;
3435         done = last;
3436
3437         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3438             BUS_DMASYNC_POSTREAD);
3439         /*
3440         ** Only the EOP descriptor of a packet now has the DD
3441         ** bit set, this is what we look for...
3442         */
3443         while (eop_desc->upper.fields.status & IXGBE_TXD_STAT_DD) {
3444                 /* We clean the range of the packet */
3445                 while (first != done) {
3446                         tx_desc->upper.data = 0;
3447                         tx_desc->lower.data = 0;
3448                         tx_desc->buffer_addr = 0;
3449                         ++txr->tx_avail;
3450                         ++processed;
3451
3452                         if (tx_buffer->m_head) {
3453                                 txr->bytes +=
3454                                     tx_buffer->m_head->m_pkthdr.len;
3455                                 bus_dmamap_sync(txr->txtag,
3456                                     tx_buffer->map,
3457                                     BUS_DMASYNC_POSTWRITE);
3458                                 bus_dmamap_unload(txr->txtag,
3459                                     tx_buffer->map);
3460                                 m_freem(tx_buffer->m_head);
3461                                 tx_buffer->m_head = NULL;
3462                                 tx_buffer->map = NULL;
3463                         }
3464                         tx_buffer->eop_index = -1;
3465                         txr->watchdog_time = ticks;
3466
3467                         if (++first == adapter->num_tx_desc)
3468                                 first = 0;
3469
3470                         tx_buffer = &txr->tx_buffers[first];
3471                         tx_desc =
3472                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
3473                 }
3474                 ++txr->packets;
3475                 ++ifp->if_opackets;
3476                 /* See if there is more work now */
3477                 last = tx_buffer->eop_index;
3478                 if (last != -1) {
3479                         eop_desc =
3480                             (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
3481                         /* Get next done point */
3482                         if (++last == adapter->num_tx_desc) last = 0;
3483                         done = last;
3484                 } else
3485                         break;
3486         }
3487         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3488             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3489
3490         txr->next_to_clean = first;
3491
3492         /*
3493         ** Watchdog calculation, we know there's
3494         ** work outstanding or the first return
3495         ** would have been taken, so none processed
3496         ** for too long indicates a hang.
3497         */
3498         if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3499                 txr->queue_status = IXGBE_QUEUE_HUNG;
3500
3501         /* With a minimum free clear the depleted state bit.  */
3502         if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
3503                 txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
3504
3505         if (txr->tx_avail == adapter->num_tx_desc) {
3506                 txr->queue_status = IXGBE_QUEUE_IDLE;
3507         }
3508 }
3509
3510 /*********************************************************************
3511  *
3512  *  Refresh mbuf buffers for RX descriptor rings
3513  *   - now keeps its own state so discards due to resource
3514  *     exhaustion are unnecessary, if an mbuf cannot be obtained
3515  *     it just returns, keeping its placeholder, thus it can simply
3516  *     be recalled to try again.
3517  *
3518  **********************************************************************/
3519 static void
3520 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3521 {
3522         struct adapter          *adapter = rxr->adapter;
3523         bus_dma_segment_t       hseg[1];
3524         bus_dma_segment_t       pseg[1];
3525         struct ixgbe_rx_buf     *rxbuf;
3526         struct mbuf             *mh, *mp;
3527         int                     i, j, nsegs, error;
3528         bool                    refreshed = FALSE;
3529
3530         i = j = rxr->next_to_refresh;
3531         /* Control the loop with one beyond */
3532         if (++j == adapter->num_rx_desc)
3533                 j = 0;
3534
3535         while (j != limit) {
3536                 rxbuf = &rxr->rx_buffers[i];
3537                 if (rxr->hdr_split == FALSE)
3538                         goto no_split;
3539
3540                 if (rxbuf->m_head == NULL) {
3541                         mh = m_gethdr(MB_DONTWAIT, MT_DATA);
3542                         if (mh == NULL)
3543                                 goto update;
3544                 } else
3545                         mh = rxbuf->m_head;
3546
3547                 mh->m_pkthdr.len = mh->m_len = MHLEN;
3548                 mh->m_len = MHLEN;
3549                 mh->m_flags |= M_PKTHDR;
3550                 /* Get the memory mapping */
3551                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3552                     rxbuf->hmap, mh, hseg, 1, &nsegs, BUS_DMA_NOWAIT);
3553                 if (error != 0) {
3554                         kprintf("Refresh mbufs: hdr dmamap load"
3555                             " failure - %d\n", error);
3556                         m_free(mh);
3557                         rxbuf->m_head = NULL;
3558                         goto update;
3559                 }
3560                 rxbuf->m_head = mh;
3561                 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3562                     BUS_DMASYNC_PREREAD);
3563                 rxr->rx_base[i].read.hdr_addr =
3564                     htole64(hseg[0].ds_addr);
3565
3566 no_split:
3567                 if (rxbuf->m_pack == NULL) {
3568                         mp = m_getjcl(MB_DONTWAIT, MT_DATA,
3569                             M_PKTHDR, adapter->rx_mbuf_sz);
3570                         if (mp == NULL)
3571                                 goto update;
3572                 } else
3573                         mp = rxbuf->m_pack;
3574
3575                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3576                 /* Get the memory mapping */
3577                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3578                     rxbuf->pmap, mp, pseg, 1, &nsegs, BUS_DMA_NOWAIT);
3579                 if (error != 0) {
3580                         kprintf("Refresh mbufs: payload dmamap load"
3581                             " failure - %d\n", error);
3582                         m_free(mp);
3583                         rxbuf->m_pack = NULL;
3584                         goto update;
3585                 }
3586                 rxbuf->m_pack = mp;
3587                 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3588                     BUS_DMASYNC_PREREAD);
3589                 rxr->rx_base[i].read.pkt_addr =
3590                     htole64(pseg[0].ds_addr);
3591
3592                 refreshed = TRUE;
3593                 /* Next is precalculated */
3594                 i = j;
3595                 rxr->next_to_refresh = i;
3596                 if (++j == adapter->num_rx_desc)
3597                         j = 0;
3598         }
3599 update:
3600         if (refreshed) /* Update hardware tail index */
3601                 IXGBE_WRITE_REG(&adapter->hw,
3602                     IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3603         return;
3604 }
3605
3606 /*********************************************************************
3607  *
3608  *  Allocate memory for rx_buffer structures. Since we use one
3609  *  rx_buffer per received packet, the maximum number of rx_buffer's
3610  *  that we'll need is equal to the number of receive descriptors
3611  *  that we've allocated.
3612  *
3613  **********************************************************************/
3614 static int
3615 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3616 {
3617         struct  adapter         *adapter = rxr->adapter;
3618         device_t                dev = adapter->dev;
3619         struct ixgbe_rx_buf     *rxbuf;
3620         int                     i, bsize, error;
3621
3622         bsize = sizeof(struct ixgbe_rx_buf) * adapter->num_rx_desc;
3623         if (!(rxr->rx_buffers =
3624             (struct ixgbe_rx_buf *) kmalloc(bsize,
3625             M_DEVBUF, M_NOWAIT | M_ZERO))) {
3626                 device_printf(dev, "Unable to allocate rx_buffer memory\n");
3627                 error = ENOMEM;
3628                 goto fail;
3629         }
3630
3631         if ((error = bus_dma_tag_create(NULL,   /* parent */
3632                                    1, 0,        /* alignment, bounds */
3633                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3634                                    BUS_SPACE_MAXADDR,   /* highaddr */
3635                                    NULL, NULL,          /* filter, filterarg */
3636                                    MSIZE,               /* maxsize */
3637                                    1,                   /* nsegments */
3638                                    MSIZE,               /* maxsegsize */
3639                                    0,                   /* flags */
3640                                    &rxr->htag))) {
3641                 device_printf(dev, "Unable to create RX DMA tag\n");
3642                 goto fail;
3643         }
3644
3645         if ((error = bus_dma_tag_create(NULL,   /* parent */
3646                                    1, 0,        /* alignment, bounds */
3647                                    BUS_SPACE_MAXADDR,   /* lowaddr */
3648                                    BUS_SPACE_MAXADDR,   /* highaddr */
3649                                    NULL, NULL,          /* filter, filterarg */
3650                                    MJUM16BYTES,         /* maxsize */
3651                                    1,                   /* nsegments */
3652                                    MJUM16BYTES,         /* maxsegsize */
3653                                    0,                   /* flags */
3654                                    &rxr->ptag))) {
3655                 device_printf(dev, "Unable to create RX DMA tag\n");
3656                 goto fail;
3657         }
3658
3659         for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3660                 rxbuf = &rxr->rx_buffers[i];
3661                 error = bus_dmamap_create(rxr->htag,
3662                     BUS_DMA_NOWAIT, &rxbuf->hmap);
3663                 if (error) {
3664                         device_printf(dev, "Unable to create RX head map\n");
3665                         goto fail;
3666                 }
3667                 error = bus_dmamap_create(rxr->ptag,
3668                     BUS_DMA_NOWAIT, &rxbuf->pmap);
3669                 if (error) {
3670                         device_printf(dev, "Unable to create RX pkt map\n");
3671                         goto fail;
3672                 }
3673         }
3674
3675         return (0);
3676
3677 fail:
3678         /* Frees all, but can handle partial completion */
3679         ixgbe_free_receive_structures(adapter);
3680         return (error);
3681 }
3682
3683 /*
3684 ** Used to detect a descriptor that has
3685 ** been merged by Hardware RSC.
3686 */
3687 static inline u32
3688 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3689 {
3690         return (le32toh(rx->wb.lower.lo_dword.data) &
3691             IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3692 }
3693
3694 /*********************************************************************
3695  *
3696  *  Initialize Hardware RSC (LRO) feature on 82599
3697  *  for an RX ring, this is toggled by the LRO capability
3698  *  even though it is transparent to the stack.
3699  *
3700  **********************************************************************/
3701 #if 0   /* NET_LRO */
3702 static void
3703 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3704 {
3705         struct  adapter         *adapter = rxr->adapter;
3706         struct  ixgbe_hw        *hw = &adapter->hw;
3707         u32                     rscctrl, rdrxctl;
3708
3709         rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3710         rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3711 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3712         if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3713 #endif /* DEV_NETMAP */
3714         rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3715         rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3716         IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3717
3718         rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3719         rscctrl |= IXGBE_RSCCTL_RSCEN;
3720         /*
3721         ** Limit the total number of descriptors that
3722         ** can be combined, so it does not exceed 64K
3723         */
3724         if (adapter->rx_mbuf_sz == MCLBYTES)
3725                 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3726         else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
3727                 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3728         else if (adapter->rx_mbuf_sz == MJUM9BYTES)
3729                 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3730         else  /* Using 16K cluster */
3731                 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3732
3733         IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3734
3735         /* Enable TCP header recognition */
3736         IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3737             (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3738             IXGBE_PSRTYPE_TCPHDR));
3739
3740         /* Disable RSC for ACK packets */
3741         IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3742             (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3743
3744         rxr->hw_rsc = TRUE;
3745 }
3746 #endif
3747
3748 static void     
3749 ixgbe_free_receive_ring(struct rx_ring *rxr)
3750
3751         struct  adapter         *adapter;
3752         struct ixgbe_rx_buf       *rxbuf;
3753         int i;
3754
3755         adapter = rxr->adapter;
3756         for (i = 0; i < adapter->num_rx_desc; i++) {
3757                 rxbuf = &rxr->rx_buffers[i];
3758                 if (rxbuf->m_head != NULL) {
3759                         bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3760                             BUS_DMASYNC_POSTREAD);
3761                         bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3762                         rxbuf->m_head->m_flags |= M_PKTHDR;
3763                         m_freem(rxbuf->m_head);
3764                 }
3765                 if (rxbuf->m_pack != NULL) {
3766                         bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3767                             BUS_DMASYNC_POSTREAD);
3768                         bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3769                         rxbuf->m_pack->m_flags |= M_PKTHDR;
3770                         m_freem(rxbuf->m_pack);
3771                 }
3772                 rxbuf->m_head = NULL;
3773                 rxbuf->m_pack = NULL;
3774         }
3775 }
3776
3777
3778 /*********************************************************************
3779  *
3780  *  Initialize a receive ring and its buffers.
3781  *
3782  **********************************************************************/
3783 static int
3784 ixgbe_setup_receive_ring(struct rx_ring *rxr)
3785 {
3786         struct  adapter         *adapter;
3787         struct ifnet            *ifp;
3788         device_t                dev;
3789         struct ixgbe_rx_buf     *rxbuf;
3790         bus_dma_segment_t       pseg[1], hseg[1];
3791 #if 0   /* NET_LRO */
3792         struct lro_ctrl         *lro = &rxr->lro;
3793 #endif
3794         int                     rsize, nsegs, error = 0;
3795 #ifdef DEV_NETMAP
3796         struct netmap_adapter *na = NA(rxr->adapter->ifp);
3797         struct netmap_slot *slot;
3798 #endif /* DEV_NETMAP */
3799
3800         adapter = rxr->adapter;
3801         ifp = adapter->ifp;
3802         dev = adapter->dev;
3803
3804         /* Clear the ring contents */
3805         IXGBE_RX_LOCK(rxr);
3806 #ifdef DEV_NETMAP
3807         /* same as in ixgbe_setup_transmit_ring() */
3808         slot = netmap_reset(na, NR_RX, rxr->me, 0);
3809 #endif /* DEV_NETMAP */
3810         rsize = roundup2(adapter->num_rx_desc *
3811             sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3812         bzero((void *)rxr->rx_base, rsize);
3813
3814         /* Free current RX buffer structs and their mbufs */
3815         ixgbe_free_receive_ring(rxr);
3816
3817         /* Configure header split? */
3818         if (ixgbe_header_split)
3819                 rxr->hdr_split = TRUE;
3820
3821         /* Now replenish the mbufs */
3822         for (int j = 0; j != adapter->num_rx_desc; ++j) {
3823                 struct mbuf     *mh, *mp;
3824
3825                 rxbuf = &rxr->rx_buffers[j];
3826 #ifdef DEV_NETMAP
3827                 /*
3828                  * In netmap mode, fill the map and set the buffer
3829                  * address in the NIC ring, considering the offset
3830                  * between the netmap and NIC rings (see comment in
3831                  * ixgbe_setup_transmit_ring() ). No need to allocate
3832                  * an mbuf, so end the block with a continue;
3833                  */
3834                 if (slot) {
3835                         int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
3836                         uint64_t paddr;
3837                         void *addr;
3838
3839                         addr = PNMB(slot + sj, &paddr);
3840                         netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
3841                         /* Update descriptor */
3842                         rxr->rx_base[j].read.pkt_addr = htole64(paddr);
3843                         continue;
3844                 }
3845 #endif /* DEV_NETMAP */
3846                 /*
3847                 ** Don't allocate mbufs if not
3848                 ** doing header split, its wasteful
3849                 */ 
3850                 if (rxr->hdr_split == FALSE)
3851                         goto skip_head;
3852
3853                 /* First the header */
3854                 rxbuf->m_head = m_gethdr(MB_DONTWAIT, MT_DATA);
3855                 if (rxbuf->m_head == NULL) {
3856                         error = ENOBUFS;
3857                         goto fail;
3858                 }
3859                 m_adj(rxbuf->m_head, ETHER_ALIGN);
3860                 mh = rxbuf->m_head;
3861                 mh->m_len = mh->m_pkthdr.len = MHLEN;
3862                 mh->m_flags |= M_PKTHDR;
3863                 /* Get the memory mapping */
3864                 error = bus_dmamap_load_mbuf_segment(rxr->htag,
3865                     rxbuf->hmap, rxbuf->m_head, hseg, 1,
3866                     &nsegs, BUS_DMA_NOWAIT);
3867
3868                 if (error != 0) /* Nothing elegant to do here */
3869                         goto fail;
3870                 bus_dmamap_sync(rxr->htag,
3871                     rxbuf->hmap, BUS_DMASYNC_PREREAD);
3872                 /* Update descriptor */
3873                 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3874
3875 skip_head:
3876                 /* Now the payload cluster */
3877                 rxbuf->m_pack = m_getjcl(MB_DONTWAIT, MT_DATA,
3878                     M_PKTHDR, adapter->rx_mbuf_sz);
3879                 if (rxbuf->m_pack == NULL) {
3880                         error = ENOBUFS;
3881                         goto fail;
3882                 }
3883                 mp = rxbuf->m_pack;
3884                 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3885                 /* Get the memory mapping */
3886                 error = bus_dmamap_load_mbuf_segment(rxr->ptag,
3887                     rxbuf->pmap, mp, hseg, 1,
3888                     &nsegs, BUS_DMA_NOWAIT);
3889                 if (error != 0)
3890                         goto fail;
3891                 bus_dmamap_sync(rxr->ptag,
3892                     rxbuf->pmap, BUS_DMASYNC_PREREAD);
3893                 /* Update descriptor */
3894                 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3895         }
3896
3897
3898         /* Setup our descriptor indices */
3899         rxr->next_to_check = 0;
3900         rxr->next_to_refresh = 0;
3901         rxr->lro_enabled = FALSE;
3902         rxr->rx_split_packets = 0;
3903         rxr->rx_bytes = 0;
3904         rxr->discard = FALSE;
3905         rxr->vtag_strip = FALSE;
3906
3907         bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3908             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3909
3910         /*
3911         ** Now set up the LRO interface:
3912         ** 82598 uses software LRO, the
3913         ** 82599 and X540 use a hardware assist.
3914         */
3915 #if 0 /* NET_LRO */
3916         if ((adapter->hw.mac.type != ixgbe_mac_82598EB) &&
3917             (ifp->if_capenable & IFCAP_RXCSUM) &&
3918             (ifp->if_capenable & IFCAP_LRO))
3919                 ixgbe_setup_hw_rsc(rxr);
3920         else if (ifp->if_capenable & IFCAP_LRO) {
3921                 int err = tcp_lro_init(lro);
3922                 if (err) {
3923                         device_printf(dev, "LRO Initialization failed!\n");
3924                         goto fail;
3925                 }
3926                 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
3927                 rxr->lro_enabled = TRUE;
3928                 lro->ifp = adapter->ifp;
3929         }
3930 #endif
3931
3932         IXGBE_RX_UNLOCK(rxr);
3933         return (0);
3934
3935 fail:
3936         ixgbe_free_receive_ring(rxr);
3937         IXGBE_RX_UNLOCK(rxr);
3938         return (error);
3939 }
3940
3941 /*********************************************************************
3942  *
3943  *  Initialize all receive rings.
3944  *
3945  **********************************************************************/
3946 static int
3947 ixgbe_setup_receive_structures(struct adapter *adapter)
3948 {
3949         struct rx_ring *rxr = adapter->rx_rings;
3950         int j;
3951
3952         for (j = 0; j < adapter->num_queues; j++, rxr++)
3953                 if (ixgbe_setup_receive_ring(rxr))
3954                         goto fail;
3955
3956         return (0);
3957 fail:
3958         /*
3959          * Free RX buffers allocated so far, we will only handle
3960          * the rings that completed, the failing case will have
3961          * cleaned up for itself. 'j' failed, so its the terminus.
3962          */
3963         for (int i = 0; i < j; ++i) {
3964                 rxr = &adapter->rx_rings[i];
3965                 ixgbe_free_receive_ring(rxr);
3966         }
3967
3968         return (ENOBUFS);
3969 }
3970
3971 /*********************************************************************
3972  *
3973  *  Setup receive registers and features.
3974  *
3975  **********************************************************************/
3976 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
3977
3978 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
3979         
3980 static void
3981 ixgbe_initialize_receive_units(struct adapter *adapter)
3982 {
3983         struct  rx_ring *rxr = adapter->rx_rings;
3984         struct ixgbe_hw *hw = &adapter->hw;
3985         struct ifnet   *ifp = adapter->ifp;
3986         u32             bufsz, rxctrl, fctrl, srrctl, rxcsum;
3987         u32             reta, mrqc = 0, hlreg, random[10];
3988
3989
3990         /*
3991          * Make sure receives are disabled while
3992          * setting up the descriptor ring
3993          */
3994         rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
3995         IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
3996             rxctrl & ~IXGBE_RXCTRL_RXEN);
3997
3998         /* Enable broadcasts */
3999         fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4000         fctrl |= IXGBE_FCTRL_BAM;
4001         fctrl |= IXGBE_FCTRL_DPF;
4002         fctrl |= IXGBE_FCTRL_PMCF;
4003         IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4004
4005         /* Set for Jumbo Frames? */
4006         hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4007         if (ifp->if_mtu > ETHERMTU)
4008                 hlreg |= IXGBE_HLREG0_JUMBOEN;
4009         else
4010                 hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4011 #ifdef DEV_NETMAP
4012         /* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4013         if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4014                 hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4015         else
4016                 hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4017 #endif /* DEV_NETMAP */
4018         IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4019
4020         bufsz = (adapter->rx_mbuf_sz +